@article{Hazman:30May2009:1744-2621:24, abstract = {Ontologies play a vital role in many web- and internet-related applications. This work presents a system for accelerating the ontology building process via semi-automatically learning a hierarchal ontology given a set of domain-specific web documents and a set of seed concepts. The methods are tested with web documents in the domain of agriculture. The ontology is constructed through the use of two complementary approaches. The presented system has been used to build an ontology in the agricultural domain using a set of Arabic extension documents and evaluated against a modified version of the AGROVOC ontology.}, author = {Hazman, Maryam and El-Beltagy, Samhaa R. and Rafea, Ahmed}, doi = {doi:10.1504/IJMSO.2009.026251}, interhash = {fe27d687bcba91a7a6fe51eec9a2b87d}, intrahash = {323c8bdedc8a4643232a498ac03d6407}, journal = {International Journal of Metadata, Semantics and Ontologies}, pages = {24-33(10)}, title = {Ontology learning from domain specific web documents}, url = {http://www.ingentaconnect.com/content/ind/ijmso/2009/00000004/F0020001/art00003}, volume = 4, year = 2009 } @inproceedings{tesconi2008semantify, abstract = {At present tagging is experimenting a great diffusion as the most adopted way to collaboratively classify resources over the Web. In this paper, after a detailed analysis of the attempts made to improve the organization and structure of tagging systems as well as the usefulness of this kind of social data, we propose and evaluate the Tag Disambiguation Algorithm, mining del.icio.us data. It allows to easily semantify the tags of the users of a tagging service: it automatically finds out for each tag the related concept of Wikipedia in order to describe Web resources through senses. On the basis of a set of evaluation tests, we analyze all the advantages of our sense-based way of tagging, proposing new methods to keep the set of users tags more consistent or to classify the tagged resources on the basis of Wikipedia categories, YAGO classes or Wordnet synsets. We discuss also how our semanitified social tagging data are strongly linked to DBPedia and the datasets of the Linked Data community. 1}, author = {Tesconi, Maurizio and Ronzano, Francesco and Marchetti, Andrea and Minutoli, Salvatore}, booktitle = {Proceedings of the Workshop Social Data on the Web (SDoW2008)}, crossref = {CEUR-WS.org/Vol-405}, file = {tesconi2008semantify.pdf:tesconi2008semantify.pdf:PDF}, groups = {public}, interhash = {0c1c96b41a0af8512c20a7d41504640f}, intrahash = {dd698b5ee4d93496d11627cbe1615514}, timestamp = {2009-09-27 15:57:13}, title = {Semantify del.icio.us: Automatically Turn your Tags into Senses}, url = {http://CEUR-WS.org/Vol-405/paper8.pdf}, username = {dbenz}, year = 2008 } @inproceedings{hjelm2008multilingual, abstract = {We present a system for taxonomy extraction, aimed at providing a taxonomic backbone in an ontology learning environment. We follow previous research in using hierarchical clustering based on distributional similarity of the terms in texts. We show that basing the clustering on a comparable corpus in four languages gives a considerable improvement in accuracy compared to using only the monolingual English texts. We also show that hierarchical k-means clustering increases the similarity to the original taxonomy, when compared with a bottom-up agglomerative clustering approach.}, author = {Hjelm, Hans and Buitelaar, Paul}, booktitle = {ECAI}, crossref = {conf/ecai/2008}, editor = {Ghallab, Malik and Spyropoulos, Constantine D. and Fakotakis, Nikos and Avouris, Nikolaos M.}, ee = {http://dx.doi.org/10.3233/978-1-58603-891-5-288}, file = {hjelm2008multilingual.pdf:hjelm2008multilingual.pdf:PDF}, groups = {public}, interhash = {21a658154fb1a02e773b7a678b15f9f4}, intrahash = {813903a333a40ecf9a59ded552acb323}, isbn = {978-1-58603-891-5}, pages = {288-292}, publisher = {IOS Press}, series = {Frontiers in Artificial Intelligence and Applications}, timestamp = {2011-01-18 12:06:01}, title = {Multilingual Evidence Improves Clustering-based Taxonomy Extraction.}, url = {http://www.ling.su.se/staff/hans/artiklar/ecai2008-hjelm-buitelaar.pdf}, username = {dbenz}, volume = 178, year = 2008 } @inproceedings{baezayates2007extracting, abstract = {In this paper we study a large query log of more than twenty million queries with the goal of extracting the semantic relations that are implicitly captured in the actions of users submitting queries and clicking answers. Previous query log analyses were mostly done with just the queries and not the actions that followed after them. We first propose a novel way to represent queries in a vector space based on a graph derived from the query-click bipartite graph. We then analyze the graph produced by our query log, showing that it is less sparse than previous results suggested, and that almost all the measures of these graphs follow power laws, shedding some light on the searching user behavior as well as on the distribution of topics that people want in the Web. The representation we introduce allows to infer interesting semantic relationships between queries. Second, we provide an experimental analysis on the quality of these relations, showing that most of them are relevant. Finally we sketch an application that detects multitopical URLs.}, address = {New York, NY, USA}, author = {Baeza-Yates, Ricardo and Tiberi, Alessandro}, booktitle = {KDD '07: Proceedings of the 13th ACM SIGKDD international conference on Knowledge discovery and data mining}, doi = {http://doi.acm.org/10.1145/1281192.1281204}, file = {baezayates2007extracting.pdf:baezayates2007extracting.pdf:PDF}, groups = {public}, interhash = {26ca034be705abaf072835784f53d877}, intrahash = {6e45b65feffd1545c6dca62bf4b8f53d}, isbn = {978-1-59593-609-7}, location = {San Jose, California, USA}, pages = {76--85}, publisher = {ACM}, timestamp = {2009-06-01 15:31:03}, title = {Extracting semantic relations from query logs}, url = {http://portal.acm.org/citation.cfm?id=1281192.1281204}, username = {dbenz}, year = 2007 } @incollection{garciasilva2008pattern, abstract = {With the goal of speeding up the ontology development process, ontology engineers are starting to reuse as much as possible available ontologies and non-ontological resources such as classification schemes, thesauri, lexicons and folksonomies, that already have some degree of consensus. The reuse of such non-ontological resources necessarily involves their re-engineering into ontologies. Non-ontological resources are highly heterogeneous in their data model and contents: they encode different types of knowledge, and they can be modeled and implemented in different ways. In this paper we present (1) a typology for non-ontological resources, (2) a pattern based approach for re-engineering non-ontological resources into ontologies, and (3) a use case of the proposed approach.}, at = {2009-02-12 17:08:10}, author = {Garc\'{i}a-Silva, Andr\'{e}s and G\'{o}mez-P\'{e}rez, Asunci\'{o}n and Su\'{a}rez-Figueroa, Mari and Villaz\'{o}n-Terrazas, Boris}, doi = {http://dx.doi.org/10.1007/978-3-540-89704-0\_12}, file = {garciasilva2008pattern.pdf:garciasilva2008pattern.pdf:PDF}, groups = {public}, interhash = {f09d71443dff615a314c435df89a3d39}, intrahash = {98096132b1eb4b4b5b0de3cec6a22de5}, journal = {The Semantic Web}, misc_id = {4039576}, pages = {167--181}, priority = {2}, timestamp = {2009-09-24 23:29:10}, title = {A Pattern Based Approach for Re-engineering Non-Ontological Resources into Ontologies}, url = {http://dx.doi.org/10.1007/978-3-540-89704-0\_12}, username = {dbenz}, year = 2008 } @article{girju2006automatic, abstract = {An important problem in knowledge discovery from text is the automatic extraction of semantic relations. This paper presents a supervised, semantically intensive, domain independent approach for the automatic detection of part–whole relations in text. First an algorithm is described that identifies lexico-syntactic patterns that encode part–whole relations. A difficulty is that these patterns also encode other semantic relations, and a learning method is necessary to discriminate whether or not a pattern contains a part–whole relation. A large set of training examples have been annotated and fed into a specialized learning system that learns classification rules. The rules are learned through an iterative semantic specialization (ISS) method applied to noun phrase constituents. Classification rules have been generated this way for different patterns such as genitives, noun compounds, and noun phrases containing prepositional phrases to extract part–whole relations from them. The applicability of these rules has been tested on a test corpus obtaining an overall average precision of 80.95% and recall of 75.91%. The results demonstrate the importance of word sense disambiguation for this task. They also demonstrate that different lexico-syntactic patterns encode different semantic information and should be treated separately in the sense that different clarification rules apply to different patterns.}, author = {Girju, Roxana and Badulescu, Adriana and Moldovan, Dan I.}, ee = {http://dx.doi.org/10.1162/coli.2006.32.1.83}, file = {girju2006automatic.pdf:girju2006automatic.pdf:PDF}, groups = {public}, interhash = {e3b517e5895171e35375ce08d632d738}, intrahash = {ce346613f91431251a6fe867f4360378}, journal = {Computational Linguistics}, journalpub = {1}, number = 1, pages = {83-135}, timestamp = {2010-10-25 15:08:53}, title = {Automatic Discovery of Part-Whole Relations.}, url = {http://dblp.uni-trier.de/db/journals/coling/coling32.html#GirjuBM06}, username = {dbenz}, volume = 32, year = 2006 } @inproceedings{snow2006semantic, abstract = {We propose a novel algorithm for inducing semantic taxonomies. Previous algorithms for taxonomy induction have typically focused on independent classifiers for discovering new single relationships based on hand-constructed or automatically discovered textual patterns. By contrast, our algorithm flexibly incorporates evidence from multiple classifiers over heterogenous relationships to optimize the entire structure of the taxonomy, using knowledge of a word’s coordinate terms to help in determining its hypernyms, and vice versa. We apply our algorithm on the problem of sense-disambiguated noun hyponym acquisition, where we combine the predictions of hypernym and coordinate term classifiers with the knowledge in a preexisting semantic taxonomy (WordNet 2.1). We add 10; 000 novel synsets to WordNet 2.1 at 84% precision, a relative error reduction of 70% over a non-joint algorithm using the same component classifiers. Finally, we show that a taxonomy built using our algorithm shows a 23% relative F-score improvement over WordNet 2.1 on an independent testset of hypernym pairs.}, author = {Snow, Rion and Jurafsky, Daniel and Ng, Andrew Y.}, booktitle = {ACL}, crossref = {conf/acl/2006}, ee = {http://acl.ldc.upenn.edu/P/P06/P06-1101.pdf}, file = {snow2006semantic.pdf:snow2006semantic.pdf:PDF}, groups = {public}, interhash = {c0f5a3a22faa8dc4b61c9a717a6c9037}, intrahash = {8f39e7ac43a97719c5a746da02dbd964}, publisher = {The Association for Computer Linguistics}, timestamp = {2010-10-25 15:06:10}, title = {Semantic Taxonomy Induction from Heterogenous Evidence.}, url = {http://dblp.uni-trier.de/db/conf/acl/acl2006.html#SnowJN06}, username = {dbenz}, year = 2006 } @article{zhou2007ontology, 0 = {http://dx.doi.org/10.1007/s10799-007-0019-5}, abstract = {Abstract\ \ Ontology is one of the fundamental cornerstones of the semantic Web. The pervasive use of ontologies in information sharing and knowledge management calls for efficient and effective approaches to ontology development. Ontology learning, which seeks to discover ontological knowledge from various forms of data automatically or semi-automatically, can overcome the bottleneck of ontology acquisition in ontology development. Despite the significant progress in ontology learning research over the past decade, there remain a number of open problems in this field. This paper provides a comprehensive review and discussion of major issues, challenges, and opportunities in ontology learning. We propose a new learning-oriented model for ontology development and a framework for ontology learning. Moreover, we identify and discuss important dimensions for classifying ontology learning approaches and techniques. In light of the impact of domain on choosing ontology learning approaches, we summarize domain characteristics that can facilitate future ontology learning effort. The paper offers a road map and a variety of insights about this fast-growing field.}, at = {2009-02-13 15:22:56}, author = {Zhou, Lina}, doi = {10.1007/s10799-007-0019-5}, file = {zhou2007ontology.pdf:zhou2007ontology.pdf:PDF}, groups = {public}, interhash = {78b6d3db998dcd27c475dfff3816f48f}, intrahash = {95b0f4f7c9c628e032d8bb4c69b432ed}, journal = {Information Technology and Management}, journalpub = {1}, misc_id = {1719627}, number = 3, pages = {241--252}, priority = {3}, timestamp = {2010-06-01 16:18:37}, title = {Ontology learning: state of the art and open issues}, url = {http://www.springerlink.com/content/j4g22112l7k00833/}, username = {dbenz}, volume = 8, year = 2007 } @article{zhou2008hierarchical, abstract = {This paper proposes a novel tree kernel-based method with rich syntactic and semantic information for the extraction of semantic relations between named entities. With a parse tree and an entity pair, we first construct a rich semantic relation tree structure to integrate both syntactic and semantic information. And then we propose a context-sensitive convolution tree kernel, which enumerates both context-free and context-sensitive sub-trees by considering the paths of their ancestor nodes as their contexts to capture structural information in the tree structure. An evaluation on the Automatic Content Extraction/Relation Detection and Characterization (ACE RDC) corpora shows that the proposed tree kernelbased method outperforms other state-of-the-art methods.}, address = {Tarrytown, NY, USA}, author = {Zhou, GuoDong and Zhang, Min and Ji, DongHong and Zhu, QiaoMing}, doi = {http://dx.doi.org/10.1016/j.ipm.2007.07.007}, file = {zhou2008hierarchical.pdf:zhou2008hierarchical.pdf:PDF}, groups = {public}, interhash = {e5e2d51cf1f3a6d5efc3bd25c40602c8}, intrahash = {b7eb173bc2c3dd1311a24ae9a96e5c2c}, issn = {0306-4573}, journal = {Information Process Managegement}, journalpub = {1}, number = 3, pages = {1008--1021}, publisher = {Pergamon Press, Inc.}, timestamp = {2010-06-10 10:51:05}, title = {Hierarchical learning strategy in semantic relation extraction}, url = {http://nlp.suda.edu.cn/~gdzhou/publication/zhougd2010_INS_ContextSensitiveTreeKernelforRelationExtraction.pdf}, username = {dbenz}, volume = 44, year = 2008 } @inproceedings{tesconi2008semantify, author = {Tesconi, Maurizio and Ronzano, Francesco and Marchetti, Andrea and Minutoli, Salvatore}, crossref = {CEUR-WS.org/Vol-405}, file = {tesconi2008semantify.pdf:tesconi2008semantify.pdf:PDF}, interhash = {0c1c96b41a0af8512c20a7d41504640f}, intrahash = {348a962fe13e0b605ffc53d592464c24}, title = {Semantify del.icio.us: Automatically Turn your Tags into Senses}, url = {http://CEUR-WS.org/Vol-405/paper8.pdf}, year = 2008 }