@article{Hazman:30May2009:1744-2621:24, abstract = {Ontologies play a vital role in many web- and internet-related applications. This work presents a system for accelerating the ontology building process via semi-automatically learning a hierarchal ontology given a set of domain-specific web documents and a set of seed concepts. The methods are tested with web documents in the domain of agriculture. The ontology is constructed through the use of two complementary approaches. The presented system has been used to build an ontology in the agricultural domain using a set of Arabic extension documents and evaluated against a modified version of the AGROVOC ontology.}, author = {Hazman, Maryam and El-Beltagy, Samhaa R. and Rafea, Ahmed}, doi = {doi:10.1504/IJMSO.2009.026251}, interhash = {fe27d687bcba91a7a6fe51eec9a2b87d}, intrahash = {323c8bdedc8a4643232a498ac03d6407}, journal = {International Journal of Metadata, Semantics and Ontologies}, pages = {24-33(10)}, title = {Ontology learning from domain specific web documents}, url = {http://www.ingentaconnect.com/content/ind/ijmso/2009/00000004/F0020001/art00003}, volume = 4, year = 2009 } @inproceedings{conf/ecai/HjelmB08, author = {Hjelm, Hans and Buitelaar, Paul}, booktitle = {ECAI}, crossref = {conf/ecai/2008}, editor = {Ghallab, Malik and Spyropoulos, Constantine D. and Fakotakis, Nikos and Avouris, Nikolaos M.}, ee = {http://dx.doi.org/10.3233/978-1-58603-891-5-288}, interhash = {21a658154fb1a02e773b7a678b15f9f4}, intrahash = {813903a333a40ecf9a59ded552acb323}, isbn = {978-1-58603-891-5}, pages = {288-292}, publisher = {IOS Press}, series = {Frontiers in Artificial Intelligence and Applications}, title = {Multilingual Evidence Improves Clustering-based Taxonomy Extraction.}, url = {http://www.ling.su.se/staff/hans/artiklar/ecai2008-hjelm-buitelaar.pdf}, volume = 178, year = 2008 } @inproceedings{plangprasopchok2009, abstract = {Automatic folksonomy construction from tags has attracted much attention recently. However, inferring hierarchical relations between concepts from tags has a drawback in that it is difficult to distinguish between more popular and more general concepts. Instead of tags we propose to use user-specified relations for learning folksonomy. We explore two statistical frameworks for aggregating many shallow individual hierarchies, expressed through the collection/set relations on the social photosharing site Flickr, into a common deeper folksonomy that reflects how a community organizes knowledge. Our approach addresses a number of challenges that arise while aggregating information from diverse users, namely noisy vocabulary, and variations in the granularity level of the concepts expressed. Our second contribution is a method for automatically evaluating learned folksonomy by comparing it to a reference taxonomy, e.g., the Web directory created by the Open Directory Project. Our empirical results suggest that user-specified relations are a good source of evidence for learning folksonomies.}, address = {New York, NY, USA}, author = {Plangprasopchok, A. and Lerman, K.}, booktitle = {WWW '09: Proceedings of the 18th international conference on World wide web}, doi = {http://doi.acm.org/10.1145/1526709.1526814}, interhash = {fccd894a82edb040d7438d6da91e3ebe}, intrahash = {559ee9d48f1a510f56765b2357aa8ea5}, isbn = {978-1-60558-487-4}, location = {Madrid, Spain}, pages = {781--790}, publisher = {ACM}, title = {Constructing folksonomies from user-specified relations on flickr}, url = {http://www2009.org/proceedings/pdf/p781.pdf}, year = 2009 } @incollection{Voelker2008, author = {V\"{o}lker, J. and Haase, P. and Hitzler, P.}, booktitle = {Ontology Learning and Population: Bridging the Gap between Text and Knowledge}, interhash = {cf638e52ae5c4f9af9d35d8faee72c16}, intrahash = {f80af45b8659db1a4327a5ce1df3f267}, owner = {blev}, publisher = {IOS Press}, series = {Frontiers in Artificial Intelligence and Applications}, timestamp = {2009.02.22}, title = {Learning Expressive Ontologies}, year = 2008 } @inproceedings{5273871, abstract = {This paper introduces WikiOnto: a system that assists in the extraction and modeling of topic ontologies in a semi-automatic manner using a preprocessed document corpus derived from Wikipedia. Based on the Wikipedia XML Corpus, we present a three-tiered framework for extracting topic ontologies in quick time and a modeling environment to refine these ontologies. Using natural language processing (NLP) and other machine learning (ML) techniques along with a very rich document corpus, this system proposes a solution to a task that is generally considered extremely cumbersome. The initial results of the prototype suggest strong potential of the system to become highly successful in ontology extraction and modeling and also inspire further research on extracting ontologies from other semi-structured document corpora as well.}, author = {Silva, L. De and Jayaratne, L.}, booktitle = {Applications of Digital Information and Web Technologies, 2009. ICADIWT '09. Second International Conference on the}, doi = {10.1109/ICADIWT.2009.5273871}, interhash = {c1996cb9e69de56e2bb2f8e763fe0482}, intrahash = {66bec053541e521fbe68c0119806ae49}, month = {Aug.}, pages = {446-451}, title = {Semi-automatic extraction and modeling of ontologies using Wikipedia XML Corpus}, url = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?isnumber=5273826&arnumber=5273871&count=156&index=116}, year = 2009 } @article{ieKey, author = {Cimiano, Philipp and V"olker, Johanna and Studer, Rudi}, date = {October 2006}, interhash = {0007185516cf6c93931a11bc13c55f3f}, intrahash = {bd70c98a41d8cc01464dd022dfd118b6}, journal = {Information, Wissenschaft und Praxis}, number = {6-7}, pages = {315-320}, title = {Ontologies on Demand? -A Description of the State-of-the-Art, Applications, Challenges and Trends for Ontology Learning from Text Information}, url = {http://www.aifb.uni-karlsruhe.de/Publikationen/showPublikation?publ_id=1282}, volume = 57, year = 2006 } @article{Brewster:2009:BMC-Bioinformatics:19426458, abstract = {BACKGROUND: Ontology construction for any domain is a labour intensive and complex process. Any methodology that can reduce the cost and increase efficiency has the potential to make a major impact in the life sciences. This paper describes an experiment in ontology construction from text for the animal behaviour domain. Our objective was to see how much could be done in a simple and relatively rapid manner using a corpus of journal papers. We used a sequence of pre-existing text processing steps, and here describe the different choices made to clean the input, to derive a set of terms and to structure those terms in a number of hierarchies. We describe some of the challenges, especially that of focusing the ontology appropriately given a starting point of a heterogeneous corpus. RESULTS: Using mainly automated techniques, we were able to construct an 18055 term ontology-like structure with 73% recall of animal behaviour terms, but a precision of only 26%. We were able to clean unwanted terms from the nascent ontology using lexico-syntactic patterns that tested the validity of term inclusion within the ontology. We used the same technique to test for subsumption relationships between the remaining terms to add structure to the initially broad and shallow structure we generated. All outputs are available at http://thirlmere.aston.ac.uk/\~kiffer/animalbehaviour/. CONCLUSION: We present a systematic method for the initial steps of ontology or structured vocabulary construction for scientific domains that requires limited human effort and can make a contribution both to ontology learning and maintenance. The method is useful both for the exploration of a scientific domain and as a stepping stone towards formally rigourous ontologies. The filtering of recognised terms from a heterogeneous corpus to focus upon those that are the topic of the ontology is identified to be one of the main challenges for research in ontology learning.}, author = {Brewster, C and Jupp, S and Luciano, J and Shotton, D and Stevens, R D and Zhang, Z}, doi = {10.1186/1471-2105-10-S5-S1}, interhash = {f4b4e74631a837df6c3d102731ec46c3}, intrahash = {e9a83a729df52557d560ad98404774c3}, journal = {BMC Bioinformatics}, pmid = {19426458}, title = {Issues in learning an ontology from text}, url = {http://www.ncbi.nlm.nih.gov/pubmed/19426458}, volume = {10 Suppl 5}, year = 2009 } @phdthesis{david2007domain, address = {Saarbrücken}, author = {Sánchez, David}, interhash = {997c9d66a7ef8844f410919ccecc4768}, intrahash = {504c0b73b391933fb0536b135144ae1d}, isbn = {9783836470698 3836470691}, pages = {--}, publisher = {VDM Verlag Dr. Müller}, refid = {426144281}, title = {Domain ontology learning from the web an unsupervised, automatic and domain independent approach}, url = {http://www.worldcat.org/search?qt=worldcat_org_all&q=3836470691}, year = 2007 } @article{1179190, abstract = { Our OntoLearn system is an infrastructure for automated ontology learning from domain text. It is the only system, as far as we know, that uses natural language processing and machine learning techniques, and is part of a more general ontology engineering architecture. We describe the system and an experiment in which we used a machine-learned tourism ontology to automatically translate multiword terms from English to Italian. The method can apply to other domains without manual adaptation.}, author = {Navigli, R. and Velardi, P. and Gangemi, A.}, doi = {10.1109/MIS.2003.1179190}, interhash = {a3f49b0b169c0e2b6ea1e3ed7e87f957}, intrahash = {d70653a1a21b9e84904def9d2fdb5151}, issn = {1541-1672}, journal = {Intelligent Systems, IEEE}, month = {Jan-Feb}, number = 1, pages = { 22-31}, title = {Ontology learning and its application to automated terminology translation}, url = {http://ieeexplore.ieee.org/search/wrapper.jsp?arnumber=1179190}, volume = 18, year = 2003 } @inproceedings{666125, address = {London, UK}, author = {Brewster, Christopher and Ciravegna, Fabio and Wilks, Yorick}, booktitle = {NLDB '02: Proceedings of the 6th International Conference on Applications of Natural Language to Information Systems-Revised Papers}, interhash = {7311591ca9cf74ff3d0817507a18f315}, intrahash = {331564c7d3891041c1024591532a45ec}, isbn = {3-540-00307-X}, pages = {203--207}, publisher = {Springer-Verlag}, title = {User-Centred Ontology Learning for Knowledge Management}, url = {http://portal.acm.org/citation.cfm?id=666125}, year = 2002 } @inproceedings{conf/icail/LenciMPV07, author = {Lenci, Alessandro and Montemagni, Simonetta and Pirrelli, Vito and Venturi, Giulia}, booktitle = {LOAIT}, crossref = {conf/icail/2007loait}, date = {2008-06-06}, editor = {Casanovas, Pompeu and Biasiotti, Maria Angela and Francesconi, Enrico and Sagri, Maria-Teresa}, ee = {http://ceur-ws.org/Vol-321/paper07.pdf}, interhash = {5649c87c6612fc0df9031536e6fe6d55}, intrahash = {f113eb70fed0141d87672429cb27bba3}, pages = {113-129}, publisher = {CEUR-WS.org}, series = {CEUR Workshop Proceedings}, title = {NLP-based Ontology Learning from Legal Texts. A Case Study.}, url = {http://dblp.uni-trier.de/db/conf/icail/loait2007.html#LenciMPV07}, volume = 321, year = 2007 } @inproceedings{Ome01, author = {Omelayenko, Borys}, booktitle = {Proceedings of the International Workshop on Web Dynamics, held in conj. with the 8th International Conference on Database Theory (ICDT’01), London, UK}, interhash = {011d45b904b02fdf1a65122d2832710b}, intrahash = {3edf80da8b39eefeea46379581628adf}, title = {Learning of Ontologies for the Web: the Analysis of Existent Approaches}, url = {http://www.dcs.bbk.ac.uk/webDyn/webDynPapers/omelayenko.pdf}, year = 2001 } @article{375731, abstract = {A data-integration system provides access to a multitude of data sources through a single mediated schema. A key bottleneck in building such systems has been the laborious manual construction of semantic mappings between the source schemas and the mediated schema. We describe LSD, a system that employs and extends current machine-learning techniques to semi-automatically find such mappings. LSD first asks the user to provide the semantic mappings for a small set of data sources, then uses these mappings together with the sources to train a set of learners. Each learner exploits a different type of information either in the source schemas or in their data. Once the learners have been trained, LSD finds semantic mappings for a new data source by applying the learners, then combining their predictions using a meta-learner. To further improve matching accuracy, we extend machine learning techniques so that LSD can incorporate domain constraints as an additional source of knowledge, and develop a novel learner that utilizes the structural information in XML documents. Our approach thus is distinguished in that it incorporates multiple types of knowledge. Importantly, its architecture is extensible to additional learners that may exploit new kinds of information. We describe a set of experiments on several real-world domains, and show that LSD proposes semantic mappings with a high degree of accuracy.}, address = {New York, NY, USA}, author = {Doan, AnHai and Domingos, Pedro and Halevy, Alon Y.}, doi = {http://doi.acm.org/10.1145/376284.375731}, interhash = {1550f1948858bf8b315ea2fc6ed789cd}, intrahash = {29e7660361ca79b97b00e5db51fb66ee}, issn = {0163-5808}, journal = {SIGMOD Rec.}, number = 2, pages = {509--520}, publisher = {ACM}, title = {Reconciling schemas of disparate data sources: a machine-learning approach}, url = {http://portal.acm.org/citation.cfm?id=375731&dl=GUIDE&coll=GUIDE&CFID=75153142&CFTOKEN=89522229}, volume = 30, year = 2001 } @inproceedings{conf/Rudolph07, address = {Berlin, Heidelberg}, author = {Rudolph, Sebastian and Völker, Johanna and Hitzler, Pascal}, booktitle = {Proceedings of the 15th International Conference on Conceptual Structures (ICCS 2007)}, crossref = {conf/iccs/2006}, editor = {Priss, Uta and Polovina, Simon and Hill, Richard}, interhash = {95939c2e69ef57fcf65e93df6010fe60}, intrahash = {06b7dbf2f1ae4a442bb1559c499dae16}, isbn = {3-540-73680-8}, month = {July}, pages = {488-491}, publisher = {Springer-Verlag}, series = {Lecture Notes in Artificial Intelligence}, title = {Supporting Lexical Ontology Learning by Relational Exploration}, url = {http://www.aifb.uni-karlsruhe.de/WBS/phi/resources/publications/iccs07-relexp.pdf}, volume = 4604, year = 2007 } @techreport{Gomez-Perez_OntoWeb03, author = {{G{\'o}mez-P{\'e}rez}, Asuncion and Manzano-Macho, David}, file = {Gomez-Perez_OntoWeb03.pdf:Gomez_Perez/Gomez-Perez_OntoWeb03.pdf:PDF}, institution = {OntoWeb Consortium}, interhash = {8ee5304684f3b0974890a7427c2438ae}, intrahash = {6b56e7f1d2b3913be8a04a09c6d566c1}, number = {1.5}, title = {A survey of ontology learning methods and techniques}, type = {Deliverable}, url = {http://www.deri.at/fileadmin/documents/deliverables/Ontoweb/D1.5.pdf}, year = 2003 } @inproceedings{1661779, abstract = {A folksonomy refers to a collection of user-defined tags with which users describe contents published on the Web. With the flourish of Web 2.0, folksonomies have become an important mean to develop the Semantic Web. Because tags in folksonomies are authored freely, there is a need to understand the structure and semantics of these tags in various applications. In this paper, we propose a learning approach to create an ontology that captures the hierarchical semantic structure of folksonomies. Our experimental results on two different genres of real world data sets show that our method can effectively learn the ontology structure from the folksonomies.}, address = {San Francisco, CA, USA}, author = {Tang, Jie and fung Leung, Ho and Luo, Qiong and Chen, Dewei and Gong, Jibin}, booktitle = {IJCAI'09: Proceedings of the 21st international jont conference on Artifical intelligence}, interhash = {17f95a6ba585888cf45443926d8b7e98}, intrahash = {7b335f08a288a79eb70eff89f1ec7630}, location = {Pasadena, California, USA}, pages = {2089--2094}, publisher = {Morgan Kaufmann Publishers Inc.}, title = {Towards ontology learning from folksonomies}, url = {http://ijcai.org/papers09/Papers/IJCAI09-344.pdf}, year = 2009 } @proceedings{30474, author = {Tresp, Volker and Bundschus, Markus and Rettinger, Achim and Huang, Yi}, interhash = {e27fbf5b5fb16f66cd0c7a3932fc4695}, intrahash = {006468688804bc3563225b8dcd7aea97}, journal = {Uncertainty Reasoning for the Semantic Web I Lecture Notes in AI}, publisher = {Springer}, title = {Towards machine learning on the semantic web}, url = {http://wwwbrauer.informatik.tu-muenchen.de/~trespvol/papers/LearningRDF23.pdf}, year = 2008 } @article{keyhere, abstract = {Abstract  Ontology is one of the fundamental cornerstones of the semantic Web. The pervasive use of ontologies in information sharing and knowledge management calls for efficient and effective approaches to ontology development. Ontology learning, which seeksto discover ontological knowledge from various forms of data automatically or semi-automatically, can overcome the bottleneckof ontology acquisition in ontology development. Despite the significant progress in ontology learning research over the pastdecade, there remain a number of open problems in this field. This paper provides a comprehensive review and discussion ofmajor issues, challenges, and opportunities in ontology learning. We propose a new learning-oriented model for ontology developmentand a framework for ontology learning. Moreover, we identify and discuss important dimensions for classifying ontology learningapproaches and techniques. In light of the impact of domain on choosing ontology learning approaches, we summarize domaincharacteristics that can facilitate future ontology learning effort. The paper offers a road map and a variety of insightsabout this fast-growing field.}, author = {Zhou, Lina}, interhash = {78b6d3db998dcd27c475dfff3816f48f}, intrahash = {95b0f4f7c9c628e032d8bb4c69b432ed}, journal = {Information Technology and Management}, month = {#sep#}, number = 3, pages = {241--252}, title = {Ontology learning: state of the art and open issues}, url = {http://dx.doi.org/10.1007/s10799-007-0019-5}, volume = 8, year = 2007 } @inproceedings{PuWang:2007, abstract = {The exponential growth of text documents available on the Internet has created an urgent need for accurate, fast, and general purpose text classification algorithms. However, the "bag of words" representation used for these classification methods is often unsatisfactory as it ignores relationships between important terms that do not co-occur literally. In order to deal with this problem, we integrate background knowledge - in our application: Wikipedia - into the process of classifying text documents. The experimental evaluation on Reuters newsfeeds and several other corpus shows that our classification results with encyclopedia knowledge are much better than the baseline "bag of words " methods.}, author = {Wang, Pu and Hu, Jian and Zeng, Hua-Jun and Chen, Lijun and Chen, Zheng}, booktitle = {Data Mining, 2007. ICDM 2007. Seventh IEEE International Conference on}, doi = {10.1109/ICDM.2007.77}, interhash = {8a899b60047e20e162fc12b2ff6f8142}, intrahash = {66058efbca5abd1222f72c32365d23fa}, isbn = {978-0-7695-3018-5}, issn = {1550-4786}, pages = {332-341}, title = {Improving Text Classification by Using Encyclopedia Knowledge}, url = {ftp://ftp.computer.org/press/outgoing/proceedings/icdm07/Data/3018a332.pdf}, year = 2007 } @inproceedings{nldb05, address = {Alicante, Spain}, author = {Cimiano, Philipp and Völker, Johanna}, booktitle = {Proceedings of the 10th International Conference on Applications of Natural Language to Information Systems (NLDB)}, editor = {Montoyo, Andres and Munoz, Rafael and Metais, Elisabeth}, interhash = {c90cb094c9f4f3cca1214d0478ffeb07}, intrahash = {072436e5adc4f5fdc39f4baeaa55b077}, month = JUN, pages = {227-238}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, title = {Text2Onto - A Framework for Ontology Learning and Data-driven Change Discovery}, url = {\url{http://www.aifb.uni-karlsruhe.de/WBS/jvo/publications/Text2Onto_nldb_2005.pdf}}, volume = 3513, year = 2005 }