@inproceedings{medelyan2008integrating, abstract = {Integration of ontologies begins with establishing mappings between their concept entries. We map categories from the largest manually-built ontology, Cyc, onto Wikipedia articles describing corresponding concepts. Our method draws both on Wikipedia’s rich but chaotic hyperlink structure and Cyc’s carefully defined taxonomic and common-sense knowledge. On 9,333 manual alignments by one person, we achieve an F-measure of 90%; on 100 alignments by six human subjects the average agreement of the method with the subject is close to their agreement with each other. We cover 62.8% of Cyc categories relating to common-sense knowledge and discuss what further information might be added to Cyc given this substantial new alignment.}, author = {Medelyan, O. and Legg, C.}, booktitle = {Proceedings of the WIKI-AI: Wikipedia and AI Workshop at the AAAI}, file = {medelyan2008integrating.pdf:medelyan2008integrating.pdf:PDF}, groups = {public}, interhash = {c279a921a5ac878ca952a4683ce9ac7a}, intrahash = {245629fc15b53a08a24df90f086e7b25}, timestamp = {2010-11-10 11:57:58}, title = {Integrating Cyc and Wikipedia: Folksonomy meets rigorously defined common-sense}, url = {http://scholar.google.de/scholar.bib?q=info:hgFpsjJR__4J:scholar.google.com/&output=citation&hl=de&as_sdt=2000&ct=citation&cd=58}, username = {dbenz}, volume = 8, year = 2008 } @incollection{haridas2009exploring, abstract = {The outgrowth of social networks in the recent years has resulted in opportunities for interesting data mining problems, such as interest or friendship recommendations. A global ontology over the interests specified by the users of a social network is essential for accurate recommendations. We propose, evaluate and compare three approaches to engineering a hierarchical ontology over user interests. The proposed approaches make use of two popular knowledge bases, Wikipedia and Directory Mozilla, to extract interest definitions and/or relationships between interests. More precisely, the first approach uses Wikipedia to find interest definitions, the latent semantic analysis technique to measure the similarity between interests based on their definitions, and an agglomerative clustering algorithm to group similar interests into higher level concepts. The second approach uses the Wikipedia Category Graph to extract relationships between interests, while the third approach uses Directory Mozilla to extract relationships between interests. Our results show that the third approach, although the simplest, is the most effective for building a hierarchy over user interests.}, address = {Berlin / Heidelberg}, affiliation = {Kansas State University Nichols Hall Manhattan KS 66502}, author = {Haridas, Mandar and Caragea, Doina}, booktitle = {On the Move to Meaningful Internet Systems: OTM 2009}, doi = {10.1007/978-3-642-05151-7_35}, editor = {Meersman, Robert and Dillon, Tharam and Herrero, Pilar}, file = {haridas2009exploring.pdf:haridas2009exploring.pdf:PDF}, groups = {public}, interhash = {2363d3cb1430a4b279692e1ff3413809}, intrahash = {982538ff1fd44d2c3296b700eac859b3}, pages = {1238-1245}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, timestamp = {2010-10-18 15:53:06}, title = {Exploring Wikipedia and DMoz as Knowledge Bases for Engineering a User Interests Hierarchy for Social Network Applications}, url = {http://dx.doi.org/10.1007/978-3-642-05151-7_35}, username = {dbenz}, volume = 5871, year = 2009 } @inproceedings{auer2007what, abstract = {Wikis are established means for the collaborative authoring, versioning and publishing of textual articles. The Wikipedia project, for example, succeeded in creating the by far largest encyclopedia just on the basis of a wiki. Recently, several approaches have been proposed on how to extend wikis to allow the creation of structured and semantically enriched content. However, the means for creating semantically enriched structured content are already available and are, although unconsciously, even used by Wikipedia authors. In this article, we present a method for revealing this structured content by extracting information from template instances. We suggest ways to efficiently query the vast amount of extracted information (e.g. more than 8 million RDF statements for the English Wikipedia version alone), leading to astonishing query answering possibilities (such as for the title question). We analyze the quality of the extracted content, and propose strategies for quality improvements with just minor modifications of the wiki systems being currently used.}, author = {Auer, S{\"o}ren and Lehmann, Jens}, bibsource = {DBLP, http://dblp.uni-trier.de}, booktitle = {ESWC}, crossref = {DBLP:conf/esws/2007}, ee = {http://dx.doi.org/10.1007/978-3-540-72667-8_36}, file = {auer2007what.pdf:auer2007what.pdf:PDF}, groups = {public}, interhash = {2b70ab546da1b45f5350d3ff742c4288}, intrahash = {b8e464b4a672530bf91c9189f17cca73}, pages = {503-517}, timestamp = {2010-02-23 14:49:49}, title = {What Have Innsbruck and Leipzig in Common? Extracting Semantics from Wiki Content}, url = {http://www.springerlink.com/content/3131t21p634191n2/}, username = {dbenz}, year = 2007 } @inproceedings{nazir2008extraction, abstract = {Social aspects are critical in the decision making process for social actors (human beings). Social aspects can be categorized into social interaction, social communities, social groups or any kind of behavior that emerges from interlinking, overlapping or similarities between interests of a society. These social aspects are dynamic and emergent. Therefore, interlinking them in a social structure, based on bipartite affiliation network, may result in isolated graphs. The major reason is that as these correspondences are dynamic and emergent, they should be coupled with more than a single affiliation in order to sustain the interconnections during interest evolutions. In this paper we propose to interlink actors using multiple tripartite graphs rather than a bipartite graph which was the focus of most of the previous social network building techniques. The utmost benefit of using tripartite graphs is that we can have multiple and hierarchical links between social actors. Therefore in this paper we discuss the extraction, plotting and analysis methods of tripartite relations between authors, articles and categories from Wikipedia. Furthermore, we also discuss the advantages of tripartite relationships over bipartite relationships. As a conclusion of this study we argue based on our results that to build useful, robust and dynamic social networks, actors should be interlinked in one or more tripartite networks.}, author = {Nazir, F. and Takeda, H.}, booktitle = {IEEE International Symposium on Technology and Society}, doi = {10.1109/ISTAS.2008.4559785}, file = {nazir2008extraction.pdf:nazir2008extraction.pdf:PDF}, groups = {public}, interhash = {7d3cb02c1c7774fe43e4303f0d3c37a4}, intrahash = {c3cca9801ab1e6d2598be1041c19618c}, isbn = {978-1-4244-1669-1}, month = jun, organization = {IEEE}, pages = {1--13}, timestamp = {2010-02-04 14:24:37}, title = {Extraction and analysis of tripartite relationships from Wikipedia}, url = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=4559785}, username = {dbenz}, year = 2008 } @inproceedings{ponzetto2007deriving, abstract = {We take the category system inWikipedia as a conceptual network. We label the semantic relations between categories using methods based on connectivity in the network and lexicosyntactic matching. As a result we are able to derive a large scale taxonomy containing a large amount of subsumption, i.e. isa, relations. We evaluate the quality of the created resource by comparing it with ResearchCyc, one of the largest manually annotated ontologies, as well as computing semantic similarity between words in benchmarking datasets.}, author = {Ponzetto, Simone Paolo and Strube, Michael}, booktitle = {AAAI}, crossref = {conf/aaai/2007}, date = {2007-09-05}, file = {ponzetto2007deriving.pdf:ponzetto2007deriving.pdf:PDF}, groups = {public}, interhash = {bc3a144ed8d3f2941359ae97a5b93194}, intrahash = {5db72406c5681facd7ad47895937d86e}, isbn = {978-1-57735-323-2}, pages = {1440-1445}, publisher = {AAAI Press}, timestamp = {2010-03-30 16:07:36}, title = {Deriving a Large-Scale Taxonomy from Wikipedia.}, url = {http://dblp.uni-trier.de/db/conf/aaai/aaai2007.html#PonzettoS07}, username = {dbenz}, year = 2007 } @inproceedings{silva2009semiautomatic, abstract = {This paper introduces WikiOnto: a system that assists in the extraction and modeling of topic ontologies in a semi-automatic manner using a preprocessed document corpus derived from Wikipedia. Based on the Wikipedia XML Corpus, we present a three-tiered framework for extracting topic ontologies in quick time and a modeling environment to refine these ontologies. Using natural language processing (NLP) and other machine learning (ML) techniques along with a very rich document corpus, this system proposes a solution to a task that is generally considered extremely cumbersome. The initial results of the prototype suggest strong potential of the system to become highly successful in ontology extraction and modeling and also inspire further research on extracting ontologies from other semi-structured document corpora as well.}, author = {Silva, L. De and Jayaratne, L.}, booktitle = {Applications of Digital Information and Web Technologies, 2009. ICADIWT '09. Second International Conference on the}, doi = {10.1109/ICADIWT.2009.5273871}, file = {silva2009semiautomatic.pdf:silva2009semiautomatic.pdf:PDF}, groups = {public}, interhash = {c1996cb9e69de56e2bb2f8e763fe0482}, intrahash = {66bec053541e521fbe68c0119806ae49}, month = {Aug.}, pages = {446-451}, timestamp = {2010-02-23 12:54:40}, title = {Semi-automatic extraction and modeling of ontologies using Wikipedia XML Corpus}, url = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?isnumber=5273826&arnumber=5273871&count=156&index=116}, username = {dbenz}, year = 2009 } @inproceedings{strube2006wikirelate, abstract = {Wikipedia provides a knowledge base for computing word relatedness in a more structured fashion than a search engine and with more coverage than WordNet. In this work we present experiments on using Wikipedia for computing semantic relatedness and compare it to WordNet on various benchmarking datasets. Existing relatedness measures perform better using Wikipedia than a baseline given by Google counts, and we show that Wikipedia outperforms WordNet when applied to the largest available dataset designed for that purpose. The best results on this dataset are obtained by integrating Google, WordNet and Wikipedia based measures. We also show that including Wikipedia improves the performance of an NLP application processing naturally occurring texts.}, author = {Strube, Michael and Ponzetto, Simone Paolo}, booktitle = {AAAI}, crossref = {conf/aaai/2006}, file = {strube2006wikirelate.pdf:strube2006wikirelate.pdf:PDF}, groups = {public}, interhash = {a09d5123ab9ab8cb00b8df6f0a7f5c81}, intrahash = {9216a46b593c3319aa23d13ca8373beb}, publisher = {AAAI Press}, timestamp = {2011-02-02 14:02:02}, title = {WikiRelate! Computing Semantic Relatedness Using Wikipedia.}, url = {http://www.dit.unitn.it/~p2p/RelatedWork/Matching/aaai06.pdf}, username = {dbenz}, year = 2006 } @inproceedings{grineva2008harnessing, abstract = {The quality of the current tagging services can be greatly improved if the service is able to cluster tags by their meaning. Tag clouds clustered by higher level topics enable the users to explore their tag space, which is especially needed when tag clouds become large. We demonstrate TagCluster - a tool for automated tag clustering that harnesses knowledge from Wikipedia about semantic relatedness between tags and names of categories to achieve smart clustering. Our approach shows much better quality of clusters compared to the existing techniques that rely on tag co-occurrence analysis in the tagging service.}, author = {Grineva, Maria and Grinev, Maxim and Turdakov, Denis and Velikhov, Pavel}, booktitle = {Proceedings of the International Workshop on Knowledge Acquisition from the Social Web (KASW2008)}, file = {grineva2008harnessing.pdf:grineva2008harnessing.pdf:PDF}, groups = {public}, interhash = {814ebc26a00c8facc9d2a7ef3edd256e}, intrahash = {093e8262f1cf4f2c4a159b5d7b76ce78}, timestamp = {2011-02-02 14:57:13}, title = {Harnessing Wikipedia for Smart Tags Clustering}, username = {dbenz}, year = 2008 } @incollection{ruizcasado2005automatic, abstract = {This paper describes an automatic approach to identify lexical patterns which represent semantic relationships between concepts, from an on-line encyclopedia. Next, these patterns can be applied to extend existing ontologies or semantic networks with new relations. The experiments have been performed with the Simple English Wikipedia and WordNet 1.7. A new algorithm has been devised for automatically generalising the lexical patterns found in the encyclopedia entries. We have found general patterns for the hyperonymy, hyponymy, holonymy and meronymy relations and, using them, we have extracted more than 1200 new relationships that did not appear in WordNet originally. The precision of these relationships ranges between 0.61 and 0.69, depending on the relation.}, address = {Berlin / Heidelberg}, affiliation = {Computer Science Dep., Universidad Autonoma de Madrid, 28049 Madrid Spain}, author = {Ruiz-Casado, Maria and Alfonseca, Enrique and Castells, Pablo}, booktitle = {Natural Language Processing and Information Systems}, doi = {10.1007/11428817_7}, editor = {Montoyo, Andrés and Muñoz, Rafael and Métais, Elisabeth}, file = {ruizcasado2005automatic.pdf:ruizcasado2005automatic.pdf:PDF}, groups = {public}, interhash = {a05c644f18f451dc2bac7c4c97f63ccd}, intrahash = {53d9a5edc19dbc8b20705768b2518fd2}, pages = {233-242}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, timestamp = {2011-02-02 15:55:29}, title = {Automatic Extraction of Semantic Relationships for WordNet by Means of Pattern Learning from Wikipedia}, url = {http://dx.doi.org/10.1007/11428817_7}, username = {dbenz}, volume = 3513, year = 2005 }