@article{Hazman:30May2009:1744-2621:24, abstract = {Ontologies play a vital role in many web- and internet-related applications. This work presents a system for accelerating the ontology building process via semi-automatically learning a hierarchal ontology given a set of domain-specific web documents and a set of seed concepts. The methods are tested with web documents in the domain of agriculture. The ontology is constructed through the use of two complementary approaches. The presented system has been used to build an ontology in the agricultural domain using a set of Arabic extension documents and evaluated against a modified version of the AGROVOC ontology.}, author = {Hazman, Maryam and El-Beltagy, Samhaa R. and Rafea, Ahmed}, doi = {doi:10.1504/IJMSO.2009.026251}, interhash = {fe27d687bcba91a7a6fe51eec9a2b87d}, intrahash = {323c8bdedc8a4643232a498ac03d6407}, journal = {International Journal of Metadata, Semantics and Ontologies}, pages = {24-33(10)}, title = {Ontology learning from domain specific web documents}, url = {http://www.ingentaconnect.com/content/ind/ijmso/2009/00000004/F0020001/art00003}, volume = 4, year = 2009 } @article{girju2006automatic, abstract = {An important problem in knowledge discovery from text is the automatic extraction of semantic relations. This paper presents a supervised, semantically intensive, domain independent approach for the automatic detection of part–whole relations in text. First an algorithm is described that identifies lexico-syntactic patterns that encode part–whole relations. A difficulty is that these patterns also encode other semantic relations, and a learning method is necessary to discriminate whether or not a pattern contains a part–whole relation. A large set of training examples have been annotated and fed into a specialized learning system that learns classification rules. The rules are learned through an iterative semantic specialization (ISS) method applied to noun phrase constituents. Classification rules have been generated this way for different patterns such as genitives, noun compounds, and noun phrases containing prepositional phrases to extract part–whole relations from them. The applicability of these rules has been tested on a test corpus obtaining an overall average precision of 80.95% and recall of 75.91%. The results demonstrate the importance of word sense disambiguation for this task. They also demonstrate that different lexico-syntactic patterns encode different semantic information and should be treated separately in the sense that different clarification rules apply to different patterns.}, author = {Girju, Roxana and Badulescu, Adriana and Moldovan, Dan I.}, ee = {http://dx.doi.org/10.1162/coli.2006.32.1.83}, file = {girju2006automatic.pdf:girju2006automatic.pdf:PDF}, groups = {public}, interhash = {e3b517e5895171e35375ce08d632d738}, intrahash = {ce346613f91431251a6fe867f4360378}, journal = {Computational Linguistics}, journalpub = {1}, number = 1, pages = {83-135}, timestamp = {2010-10-25 15:08:53}, title = {Automatic Discovery of Part-Whole Relations.}, url = {http://dblp.uni-trier.de/db/journals/coling/coling32.html#GirjuBM06}, username = {dbenz}, volume = 32, year = 2006 } @inproceedings{snow2006semantic, abstract = {We propose a novel algorithm for inducing semantic taxonomies. Previous algorithms for taxonomy induction have typically focused on independent classifiers for discovering new single relationships based on hand-constructed or automatically discovered textual patterns. By contrast, our algorithm flexibly incorporates evidence from multiple classifiers over heterogenous relationships to optimize the entire structure of the taxonomy, using knowledge of a word’s coordinate terms to help in determining its hypernyms, and vice versa. We apply our algorithm on the problem of sense-disambiguated noun hyponym acquisition, where we combine the predictions of hypernym and coordinate term classifiers with the knowledge in a preexisting semantic taxonomy (WordNet 2.1). We add 10; 000 novel synsets to WordNet 2.1 at 84% precision, a relative error reduction of 70% over a non-joint algorithm using the same component classifiers. Finally, we show that a taxonomy built using our algorithm shows a 23% relative F-score improvement over WordNet 2.1 on an independent testset of hypernym pairs.}, author = {Snow, Rion and Jurafsky, Daniel and Ng, Andrew Y.}, booktitle = {ACL}, crossref = {conf/acl/2006}, ee = {http://acl.ldc.upenn.edu/P/P06/P06-1101.pdf}, file = {snow2006semantic.pdf:snow2006semantic.pdf:PDF}, groups = {public}, interhash = {c0f5a3a22faa8dc4b61c9a717a6c9037}, intrahash = {8f39e7ac43a97719c5a746da02dbd964}, publisher = {The Association for Computer Linguistics}, timestamp = {2010-10-25 15:06:10}, title = {Semantic Taxonomy Induction from Heterogenous Evidence.}, url = {http://dblp.uni-trier.de/db/conf/acl/acl2006.html#SnowJN06}, username = {dbenz}, year = 2006 } @inproceedings{sorg2008cirwesa, author = {Sorg, Philipp and Cimiano, Philipp}, booktitle = {Working Notes for the CLEF 2008 Workshop}, evastar_pdf = {2008_1837_Sorg_Cross-lingual_I_1.pdf}, interhash = {7063c5bdff3528939877aa07ae4a35de}, intrahash = {27c1c374a750725824118ac02ba5f2c6}, title = {Cross-lingual Information Retrieval with Explicit Semantic Analysis}, type = {Inproceedings}, url = {http://www.aifb.kit.edu/images/7/7c/2008_1837_Sorg_Cross-lingual_I_1.pdf}, year = 2008 } @inproceedings{conf/ecai/HjelmB08, author = {Hjelm, Hans and Buitelaar, Paul}, booktitle = {ECAI}, crossref = {conf/ecai/2008}, editor = {Ghallab, Malik and Spyropoulos, Constantine D. and Fakotakis, Nikos and Avouris, Nikolaos M.}, ee = {http://dx.doi.org/10.3233/978-1-58603-891-5-288}, interhash = {21a658154fb1a02e773b7a678b15f9f4}, intrahash = {813903a333a40ecf9a59ded552acb323}, isbn = {978-1-58603-891-5}, pages = {288-292}, publisher = {IOS Press}, series = {Frontiers in Artificial Intelligence and Applications}, title = {Multilingual Evidence Improves Clustering-based Taxonomy Extraction.}, url = {http://www.ling.su.se/staff/hans/artiklar/ecai2008-hjelm-buitelaar.pdf}, volume = 178, year = 2008 } @article{journals/www/EdaYUU09, author = {Eda, Takeharu and Yoshikawa, Masatoshi and Uchiyama, Toshio and Uchiyama, Tadasu}, ee = {http://dx.doi.org/10.1007/s11280-009-0069-1}, interhash = {a560796c977bc7582017f662bf88c16d}, intrahash = {ec3c256e7d1f24cd9d407d3ce7e41d96}, journal = {World Wide Web}, number = 4, pages = {421-440}, title = {The Effectiveness of Latent Semantic Analysis for Building Up a Bottom-up Taxonomy from Folksonomy Tags.}, url = {http://dblp.uni-trier.de/db/journals/www/www12.html#EdaYUU09}, volume = 12, year = 2009 } @inproceedings{benz2010semantics, address = {Raleigh, NC, USA}, author = {Benz, Dominik and Hotho, Andreas and Stumme, Gerd}, booktitle = {Proceedings of the 2nd Web Science Conference (WebSci10)}, interhash = {dbd2ac30cfb0faa29413275afc9b4387}, intrahash = {ba43b0db4b8f7cb091fd55d59e170477}, title = {Semantics made by you and me: Self-emerging ontologies can capture the diversity of shared knowledge}, year = 2010 } @inproceedings{Van-Damme2007, address = {Innsbruck}, author = {Damme, C{\'e}line Van and Hepp, Martin and Siorpaes, Katharina}, bdsk-file-1 = {YnBsaXN0MDDUAQIDBAUIJidUJHRvcFgkb2JqZWN0c1gkdmVyc2lvblkkYXJjaGl2ZXLRBgdUcm9vdIABqAkKFRYXGyIjVSRudWxs0wsMDQ4RFFpOUy5vYmplY3RzV05TLmtleXNWJGNsYXNzog8QgASABqISE4ACgAOAB1lhbGlhc0RhdGFccmVsYXRpdmVQYXRo0hgNGRpXTlMuZGF0YU8RAZwAAAAAAZwAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAMa9fPRIKwAAABHUZggyMDA3LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEdRnxGOdLAAAAAAAAAAAAAEABAAACSAAAAAAAAAAAAAAAAAAAAAJVmFuIERhbW1lAAAQAAgAAMa9YNQAAAARAAgAAMRjgQwAAAABABQAEdRmABGaVQAFq7MABaumAACRnwACAERNYWNpbnRvc2ggSEQ6VXNlcnM6bWF0aGlhczpEb2N1bWVudHM6emV0dGVsa2FzdGVuOlZhbiBEYW1tZToyMDA3LnBkZgAOABIACAAyADAAMAA3AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA3VXNlcnMvbWF0aGlhcy9Eb2N1bWVudHMvemV0dGVsa2FzdGVuL1ZhbiBEYW1tZS8yMDA3LnBkZgAAEwABLwAAFQACAA7//wAAgAXSHB0eH1gkY2xhc3Nlc1okY2xhc3NuYW1lox8gIV1OU011dGFibGVEYXRhVk5TRGF0YVhOU09iamVjdF8QLC4uL0RvY3VtZW50cy96ZXR0ZWxrYXN0ZW4vVmFuIERhbW1lLzIwMDcucGRm0hwdJCWiJSFcTlNEaWN0aW9uYXJ5EgABhqBfEA9OU0tleWVkQXJjaGl2ZXIACAARABYAHwAoADIANQA6ADwARQBLAFIAXQBlAGwAbwBxAHMAdgB4AHoAfACGAJMAmACgAkACQgJHAlACWwJfAm0CdAJ9AqwCsQK0AsECxgAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAALY}, bdsk-url-1 = {http://www.kde.cs.uni-kassel.de/ws/eswc2007/proc/ProceedingsSemnet07.pdf}, booktitle = {Bridging the Gap between Semantic Web and Web 2.0 (SemNet 2007)}, date-added = {2009-08-17 11:40:57 +0200}, date-modified = {2010-01-04 09:30:08 +0100}, interhash = {c8d1bcaa606229417f1c3f0f27c5f0e0}, intrahash = {8d57c1e57c7aba60acb767e3d5b0fa13}, pages = {57--70}, title = {FolksOntology: An Integrated Approach for Turning Folksonomies into Ontologies}, url = {http://www.kde.cs.uni-kassel.de/ws/eswc2007/proc/ProceedingsSemnet07.pdf}, urldate = {28.5.2008}, year = 2007 } @inproceedings{Zhou/2007/Unsupervised, abstract = {This paper deals with the problem of exploring hierarchical semantics from social annotations. Recently, social annotation services have become more and more popular in Semantic Web. It allows users to arbitrarily annotate web resources, thus, largely lowers the barrier to cooperation. Furthermore, through providing abundant meta-data resources, social annotation might become a key to the development of Semantic Web. However, on the other hand, social annotation has its own apparent limitations, for instance, 1) ambiguity and synonym phenomena and 2) lack of hierarchical information. In this paper, we propose an unsupervised model to automatically derive hierarchical semantics from social annotations. Using a social bookmark service Del.icio.us as example, we demonstrate that the derived hierarchical semantics has the ability to compensate those shortcomings. We further apply our model on another data set from Flickr to testify our model's applicability on different environments. The experimental results demonstrate our model's effciency.}, address = {Berlin, Heidelberg}, author = {Zhou, Mianwei and Bao, Shenghua and Wu, Xian and Yu, Yong}, booktitle = {Proceedings of the 6th International Semantic Web Conference and 2nd Asian Semantic Web Conference (ISWC/ASWC2007), Busan, South Korea}, crossref = {http://data.semanticweb.org/conference/iswc-aswc/2007/proceedings}, editor = {Aberer, Karl and Choi, Key-Sun and Noy, Natasha and Allemang, Dean and Lee, Kyung-Il and Nixon, Lyndon J B and Golbeck, Jennifer and Mika, Peter and Maynard, Diana and Schreiber, Guus and Cudré-Mauroux, Philippe}, interhash = {af21595ee9f4a13b5e651ad049f31262}, intrahash = {355fcbb32255f3ba5f41819c00c520ba}, month = {November}, pages = {673--686}, publisher = {Springer Verlag}, series = {LNCS}, title = {An Unsupervised Model for Exploring Hierarchical Semantics from Social Annotations}, url = {http://iswc2007.semanticweb.org/papers/673.pdf}, volume = 4825, year = 2007 } @inproceedings{plangprasopchok2009, abstract = {Automatic folksonomy construction from tags has attracted much attention recently. However, inferring hierarchical relations between concepts from tags has a drawback in that it is difficult to distinguish between more popular and more general concepts. Instead of tags we propose to use user-specified relations for learning folksonomy. We explore two statistical frameworks for aggregating many shallow individual hierarchies, expressed through the collection/set relations on the social photosharing site Flickr, into a common deeper folksonomy that reflects how a community organizes knowledge. Our approach addresses a number of challenges that arise while aggregating information from diverse users, namely noisy vocabulary, and variations in the granularity level of the concepts expressed. Our second contribution is a method for automatically evaluating learned folksonomy by comparing it to a reference taxonomy, e.g., the Web directory created by the Open Directory Project. Our empirical results suggest that user-specified relations are a good source of evidence for learning folksonomies.}, address = {New York, NY, USA}, author = {Plangprasopchok, A. and Lerman, K.}, booktitle = {WWW '09: Proceedings of the 18th international conference on World wide web}, doi = {http://doi.acm.org/10.1145/1526709.1526814}, interhash = {fccd894a82edb040d7438d6da91e3ebe}, intrahash = {559ee9d48f1a510f56765b2357aa8ea5}, isbn = {978-1-60558-487-4}, location = {Madrid, Spain}, pages = {781--790}, publisher = {ACM}, title = {Constructing folksonomies from user-specified relations on flickr}, url = {http://www2009.org/proceedings/pdf/p781.pdf}, year = 2009 } @inproceedings{brank2005, author = {Brank, Janez and Grobelnik, Marko and Mladeni{\'c}, Dunja}, booktitle = {Proc. of 8th Int. multi-conf. Information Society}, interhash = {394d7ea166cc0745dc8682a65975648c}, intrahash = {8c910a2d3f6708b23e03e06ff843c8a8}, pages = {166--169}, title = {A Survey of Ontology Evaluation Techniques}, year = 2005 } @incollection{Voelker2008, author = {V\"{o}lker, J. and Haase, P. and Hitzler, P.}, booktitle = {Ontology Learning and Population: Bridging the Gap between Text and Knowledge}, interhash = {cf638e52ae5c4f9af9d35d8faee72c16}, intrahash = {f80af45b8659db1a4327a5ce1df3f267}, owner = {blev}, publisher = {IOS Press}, series = {Frontiers in Artificial Intelligence and Applications}, timestamp = {2009.02.22}, title = {Learning Expressive Ontologies}, year = 2008 } @inproceedings{5273871, abstract = {This paper introduces WikiOnto: a system that assists in the extraction and modeling of topic ontologies in a semi-automatic manner using a preprocessed document corpus derived from Wikipedia. Based on the Wikipedia XML Corpus, we present a three-tiered framework for extracting topic ontologies in quick time and a modeling environment to refine these ontologies. Using natural language processing (NLP) and other machine learning (ML) techniques along with a very rich document corpus, this system proposes a solution to a task that is generally considered extremely cumbersome. The initial results of the prototype suggest strong potential of the system to become highly successful in ontology extraction and modeling and also inspire further research on extracting ontologies from other semi-structured document corpora as well.}, author = {Silva, L. De and Jayaratne, L.}, booktitle = {Applications of Digital Information and Web Technologies, 2009. ICADIWT '09. Second International Conference on the}, doi = {10.1109/ICADIWT.2009.5273871}, interhash = {c1996cb9e69de56e2bb2f8e763fe0482}, intrahash = {66bec053541e521fbe68c0119806ae49}, month = {Aug.}, pages = {446-451}, title = {Semi-automatic extraction and modeling of ontologies using Wikipedia XML Corpus}, url = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?isnumber=5273826&arnumber=5273871&count=156&index=116}, year = 2009 } @article{ieKey, author = {Cimiano, Philipp and V"olker, Johanna and Studer, Rudi}, date = {October 2006}, interhash = {0007185516cf6c93931a11bc13c55f3f}, intrahash = {bd70c98a41d8cc01464dd022dfd118b6}, journal = {Information, Wissenschaft und Praxis}, number = {6-7}, pages = {315-320}, title = {Ontologies on Demand? -A Description of the State-of-the-Art, Applications, Challenges and Trends for Ontology Learning from Text Information}, url = {http://www.aifb.uni-karlsruhe.de/Publikationen/showPublikation?publ_id=1282}, volume = 57, year = 2006 } @phdthesis{Brewster_Knowledge08, author = {Brewster, Christopher}, interhash = {6f7167a562cc03e6ca09cd73ad452a4f}, intrahash = {48f029a926318103b1a6e24426e9f2c7}, school = {Department of Computer Science, University of Sheffield}, title = {Mind the Gap: Bridging from Text to Ontological Knowledge}, year = 2008 } @article{Brewster:2009:BMC-Bioinformatics:19426458, abstract = {BACKGROUND: Ontology construction for any domain is a labour intensive and complex process. Any methodology that can reduce the cost and increase efficiency has the potential to make a major impact in the life sciences. This paper describes an experiment in ontology construction from text for the animal behaviour domain. Our objective was to see how much could be done in a simple and relatively rapid manner using a corpus of journal papers. We used a sequence of pre-existing text processing steps, and here describe the different choices made to clean the input, to derive a set of terms and to structure those terms in a number of hierarchies. We describe some of the challenges, especially that of focusing the ontology appropriately given a starting point of a heterogeneous corpus. RESULTS: Using mainly automated techniques, we were able to construct an 18055 term ontology-like structure with 73% recall of animal behaviour terms, but a precision of only 26%. We were able to clean unwanted terms from the nascent ontology using lexico-syntactic patterns that tested the validity of term inclusion within the ontology. We used the same technique to test for subsumption relationships between the remaining terms to add structure to the initially broad and shallow structure we generated. All outputs are available at http://thirlmere.aston.ac.uk/\~kiffer/animalbehaviour/. CONCLUSION: We present a systematic method for the initial steps of ontology or structured vocabulary construction for scientific domains that requires limited human effort and can make a contribution both to ontology learning and maintenance. The method is useful both for the exploration of a scientific domain and as a stepping stone towards formally rigourous ontologies. The filtering of recognised terms from a heterogeneous corpus to focus upon those that are the topic of the ontology is identified to be one of the main challenges for research in ontology learning.}, author = {Brewster, C and Jupp, S and Luciano, J and Shotton, D and Stevens, R D and Zhang, Z}, doi = {10.1186/1471-2105-10-S5-S1}, interhash = {f4b4e74631a837df6c3d102731ec46c3}, intrahash = {e9a83a729df52557d560ad98404774c3}, journal = {BMC Bioinformatics}, pmid = {19426458}, title = {Issues in learning an ontology from text}, url = {http://www.ncbi.nlm.nih.gov/pubmed/19426458}, volume = {10 Suppl 5}, year = 2009 } @inproceedings{linguistically2009, abstract = {In this paper we argue why it is necessary to associate linguistic information with ontologies and why more expressive models, beyond RDFS, OWL and SKOS, are needed to capture the relation between natural language constructs on the one hand and ontological entities on the other. We argue that in the light of tasks such as ontology-based information extraction, ontology learning and population from text and natural language generation from ontologies, currently available datamodels are not sufficient as they only allow to associate atomic terms without linguistic grounding or structure to ontology elements. Towards realizing a more expressive model for associating linguistic information to ontology elements, we base our work presented here on previously developed models (LingInfo, LexOnto, LMF) and present a new joint model for linguistic grounding of ontologies called LexInfo. LexInfo combines essential design aspects of LingInfo and LexOnto and builds on a sound model for representing computational lexica called LMF which has been recently approved as a standard under ISO.}, author = {Buitelaar, Paul and Cimiano, Philipp and Haase, Peter and Sintek, Michael}, booktitle = {6th Annual European Semantic Web Conference (ESWC2009)}, interhash = {28225358e49213819fa071deb62c4aa1}, intrahash = {8213d4d08414fd60fe86e59d41895d4e}, month = {June}, pages = {111-125}, title = {Towards Linguistically Grounded Ontologies}, url = {http://www.cimiano.de/Publications/2009/eswc09/eswc09.pdf}, year = 2009 } @phdthesis{david2007domain, address = {Saarbrücken}, author = {Sánchez, David}, interhash = {997c9d66a7ef8844f410919ccecc4768}, intrahash = {504c0b73b391933fb0536b135144ae1d}, isbn = {9783836470698 3836470691}, pages = {--}, publisher = {VDM Verlag Dr. Müller}, refid = {426144281}, title = {Domain ontology learning from the web an unsupervised, automatic and domain independent approach}, url = {http://www.worldcat.org/search?qt=worldcat_org_all&q=3836470691}, year = 2007 } @article{1672980, abstract = {Domain ontologies play an important role in supporting knowledge-based applications in the Semantic Web. To facilitate the building of ontologies, text mining techniques have been used to perform ontology learning from texts. However, traditional systems employ shallow natural language processing techniques and focus only on concept and taxonomic relation extraction. In this paper we present a system, known as Concept-Relation-Concept Tuple-based Ontology Learning (CRCTOL), for mining ontologies automatically from domain-specific documents. Specifically, CRCTOL adopts a full text parsing technique and employs a combination of statistical and lexico-syntactic methods, including a statistical algorithm that extracts key concepts from a document collection, a word sense disambiguation algorithm that disambiguates words in the key concepts, a rule-based algorithm that extracts relations between the key concepts, and a modified generalized association rule mining algorithm that prunes unimportant relations for ontology learning. As a result, the ontologies learned by CRCTOL are more concise and contain a richer semantics in terms of the range and number of semantic relations compared with alternative systems. We present two case studies where CRCTOL is used to build a terrorism domain ontology and a sport event domain ontology. At the component level, quantitative evaluation by comparing with Text-To-Onto and its successor Text2Onto has shown that CRCTOL is able to extract concepts and semantic relations with a significantly higher level of accuracy. At the ontology level, the quality of the learned ontologies is evaluated by either employing a set of quantitative and qualitative methods including analyzing the graph structural property, comparison to WordNet, and expert rating, or directly comparing with a human-edited benchmark ontology, demonstrating the high quality of the ontologies learned. © 2010 Wiley Periodicals, Inc.}, address = {New York, NY, USA}, author = {Jiang, Xing and Tan, Ah-Hwee}, doi = {http://dx.doi.org/10.1002/asi.v61:1}, interhash = {613b2a27ee0e45e20a8bfea6de35ec00}, intrahash = {e44d2fd55b85cc56b08afd134f437012}, issn = {1532-2882}, journal = {J. Am. Soc. Inf. Sci. Technol.}, number = 1, pages = {150--168}, publisher = {John Wiley \& Sons, Inc.}, title = {CRCTOL: A semantic-based domain ontology learning system}, url = {http://portal.acm.org/citation.cfm?id=1672957.1672980&coll=portal&dl=ACM}, volume = 61, year = 2010 } @article{1179190, abstract = { Our OntoLearn system is an infrastructure for automated ontology learning from domain text. It is the only system, as far as we know, that uses natural language processing and machine learning techniques, and is part of a more general ontology engineering architecture. We describe the system and an experiment in which we used a machine-learned tourism ontology to automatically translate multiword terms from English to Italian. The method can apply to other domains without manual adaptation.}, author = {Navigli, R. and Velardi, P. and Gangemi, A.}, doi = {10.1109/MIS.2003.1179190}, interhash = {a3f49b0b169c0e2b6ea1e3ed7e87f957}, intrahash = {d70653a1a21b9e84904def9d2fdb5151}, issn = {1541-1672}, journal = {Intelligent Systems, IEEE}, month = {Jan-Feb}, number = 1, pages = { 22-31}, title = {Ontology learning and its application to automated terminology translation}, url = {http://ieeexplore.ieee.org/search/wrapper.jsp?arnumber=1179190}, volume = 18, year = 2003 }