@article{mohammad2006distributional, author = {Mohammad, S. and Hirst, G.}, interhash = {54ef57feee3ddc48589e4ab2f0daa090}, intrahash = {4ae2d940655a514232e1d2ef698f1acd}, journal = {Computational Linguistics}, number = 1, title = {Distributional Measures as Proxies for Semantic Distance: A Survey}, volume = 1, year = 2006 } @article{budanitsky2006evaluating, address = {Cambridge, MA, USA}, author = {Budanitsky, Alexander and Hirst, Graeme}, file = {budanitsky2006evaluating.pdf:budanitsky2006evaluating.pdf:PDF}, groups = {public}, interhash = {a259f21d89bdc61a64ce11a3aea0af06}, intrahash = {563138e890f4463f29c0324c95878129}, journal = {Computational Linguists}, number = 1, pages = {13--47}, publisher = {MIT Press}, timestamp = {2010-06-03 10:09:44}, title = {Evaluating WordNet-based Measures of Lexical Semantic Relatedness}, url = {http://ftp.cs.toronto.edu/pub/gh/Budanitsky+Hirst-2006.pdf}, username = {dbenz}, volume = 32, year = 2006 } @inproceedings{gabrilovich2007computing, abstract = {Computing semantic relatedness of natural language texts requires access to vast amounts of common-sense and domain-specific world knowledge. We propose Explicit Semantic Analysis (ESA), a novel method that represents the meaning of texts in a high-dimensional space of concepts derived from Wikipedia. We use machine learning techniques to explicitly represent the meaning of any text as a weighted vector of Wikipedia-based concepts. Assessing the relatedness of texts in this space amounts to comparing the corresponding vectors using conventional metrics (e.g., cosine). Compared with the previous state of the art, using ESA results in substantial improvements in correlation of computed relatedness scores with human judgments: from r = 0:56 to 0:75 for individual words and from r = 0:60 to 0:72 for texts. Importantly, due to the use of natural concepts, the ESA model is easy to explain to human users.}, author = {Gabrilovich, E. and Markovitch, S.}, booktitle = {Proceedings of the 20th International Joint Conference on Artificial Intelligence}, file = {gabrilovich2007computing.pdf:gabrilovich2007computing.pdf:PDF}, groups = {public}, interhash = {5baf6af4bf58cf3926b39a12edb35e58}, intrahash = {839a06f838f02c04a8569fd41a5da284}, pages = {6--12}, timestamp = {2010-08-16 14:11:53}, title = {Computing semantic relatedness using wikipedia-based explicit semantic analysis}, url = {http://scholar.google.de/scholar.bib?q=info:woCrRNTAsA4J:scholar.google.com/&output=citation&hl=de&as_sdt=2000&ct=citation&cd=3}, username = {dbenz}, year = 2007 } @conference{gabrilovich2007computing, author = {Gabrilovich, E. and Markovitch, S.}, booktitle = {Proceedings of the 20th International Joint Conference on Artificial Intelligence}, file = {gabrilovich2007computing.pdf:gabrilovich2007computing.pdf:PDF}, interhash = {5baf6af4bf58cf3926b39a12edb35e58}, intrahash = {839a06f838f02c04a8569fd41a5da284}, pages = {6--12}, title = {{Computing semantic relatedness using wikipedia-based explicit semantic analysis}}, url = {http://scholar.google.de/scholar.bib?q=info:woCrRNTAsA4J:scholar.google.com/&output=citation&hl=de&as_sdt=2000&ct=citation&cd=3}, year = 2007 } @inproceedings{cattuto2008semantica, abstract = {Collaborative tagging systems have nowadays become important data sources for populating semantic web applications. For taskslike synonym detection and discovery of concept hierarchies, many researchers introduced measures of tag similarity. Eventhough most of these measures appear very natural, their design often seems to be rather ad hoc, and the underlying assumptionson the notion of similarity are not made explicit. A more systematic characterization and validation of tag similarity interms of formal representations of knowledge is still lacking. Here we address this issue and analyze several measures oftag similarity: Each measure is computed on data from the social bookmarking system del.icio.us and a semantic grounding isprovided by mapping pairs of similar tags in the folksonomy to pairs of synsets in Wordnet, where we use validated measuresof semantic distance to characterize the semantic relation between the mapped tags. This exposes important features of theinvestigated similarity measures and indicates which ones are better suited in the context of a given semantic application.}, address = {Heidelberg}, author = {Cattuto, Ciro and Benz, Dominik and Hotho, Andreas and Stumme, Gerd}, booktitle = {The Semantic Web -- ISWC 2008, Proc.Intl. Semantic Web Conference 2008}, doi = {http://dx.doi.org/10.1007/978-3-540-88564-1_39}, editor = {Sheth, Amit P. and Staab, Steffen and Dean, Mike and Paolucci, Massimo and Maynard, Diana and Finin, Timothy W. and Thirunarayan, Krishnaprasad}, file = {cattuto2008semantica.pdf:cattuto2008semantica.pdf:PDF}, groups = {public}, interhash = {b44538648cfd476d6c94e30bc6626c86}, intrahash = {27198c985b3bdb6daab0f7e961b370a9}, pages = {615--631}, publisher = {Springer}, series = {LNAI}, timestamp = {2009-09-14 19:12:46}, title = {Semantic Grounding of Tag Relatedness in Social Bookmarking Systems}, url = {http://www.kde.cs.uni-kassel.de/pub/pdf/cattuto2008semantica.pdf}, username = {dbenz}, volume = 5318, year = 2008 } @inproceedings{markines2009evaluating, abstract = {Social bookmarking systems and their emergent information structures, known as folksonomies, are increasingly important data sources for Semantic Web applications. A key question for harvesting semantics from these systems is how to extend and adapt traditional notions of similarity to folksonomies, and which measures are best suited for applications such as navigation support, semantic search, and ontology learning. Here we build an evaluation framework to compare various general folksonomy-based similarity measures derived from established information-theoretic, statistical, and practical measures. Our framework deals generally and symmetrically with users, tags, and resources. For evaluation purposes we focus on similarity among tags and resources, considering different ways to aggregate annotations across users. After comparing how tag similarity measures predict user-created tag relations, we provide an external grounding by user-validated semantic proxies based on WordNet and the Open Directory. We also investigate the issue of scalability. We ?nd that mutual information with distributional micro-aggregation across users yields the highest accuracy, but is not scalable; per-user projection with collaborative aggregation provides the best scalable approach via incremental computations. The results are consistent across resource and tag similarity.}, author = {Markines, Benjamin and Cattuto, Ciro and Menczer, Filippo and Benz, Dominik and Hotho, Andreas and Stumme, Gerd}, booktitle = {18th International World Wide Web Conference}, file = {markines2009evaluating.pdf:markines2009evaluating.pdf:PDF}, groups = {public}, interhash = {a266558ad4d83d536a0be2ac94b6b7df}, intrahash = {d16e752a8295d5dad7e26b199d9f614f}, month = {April}, pages = {641--641}, timestamp = {2009-06-30 11:54:56}, title = {Evaluating Similarity Measures for Emergent Semantics of Social Tagging}, url = {http://www.kde.cs.uni-kassel.de/pub/pdf/markines2009evaluating.pdf}, username = {dbenz}, year = 2009 } @inproceedings{cattuto2008semantic, abstract = {Social bookmarking systems allow users to organise collections of resources on the Web in a collaborative fashion. The increasing popularity of these systems as well as first insights into their emergent semantics have made them relevant to disciplines like knowledge extraction and ontology learning. The problem of devising methods to measure the semantic relatedness between tags and characterizing it semantically is still largely open. Here we analyze three measures of tag relatedness: tag co-occurrence, cosine similarity of co-occurrence distributions, and FolkRank, an adaptation of the PageRank algorithm to folksonomies. Each measure is computed on tags from a large-scale dataset crawled from the social bookmarking system del.icio.us. To provide a semantic grounding of our findings, a connection to WordNet (a semantic lexicon for the English language) is established by mapping tags into synonym sets of WordNet, and applying there well-known metrics of semantic similarity. Our results clearly expose different characteristics of the selected measures of relatedness, making them applicable to different subtasks of knowledge extraction such as synonym detection or discovery of concept hierarchies.}, address = {Patras, Greece}, author = {Cattuto, Ciro and Benz, Dominik and Hotho, Andreas and Stumme, Gerd}, booktitle = {Proceedings of the 3rd Workshop on Ontology Learning and Population (OLP3)}, file = {cattuto2008semantic.pdf:cattuto2008semantic.pdf:PDF}, groups = {public}, homepage = {http://olp.dfki.de/olp3/}, interhash = {cc62b733f6e0402db966d6dbf1b7711f}, intrahash = {3b0aca61b24e4343bd80390614e3066e}, isbn = {978-960-89282-6-8}, month = {July}, note = {ISBN 978-960-89282-6-8}, pages = {39--43}, timestamp = {2009-09-14 19:31:25}, title = {Semantic Analysis of Tag Similarity Measures in Collaborative Tagging Systems}, url = {http://www.kde.cs.uni-kassel.de/pub/pdf/cattuto2008semantic.pdf}, username = {dbenz}, year = 2008 } @techreport{DRSTV09, author = {Do, Q. and Roth, D. and Sammons, M. and Tu, Y. and Vydiswaran, V.}, booktitle = {Computer Science Research and Technical Reports, University of Illinois}, interhash = {59227aff23f542ff4951297896ddcc0f}, intrahash = {7618a9039657d0921991355a3109fada}, title = {Robust, Light-weight Approaches to compute Lexical Similarity}, url = {http://cogcomp.cs.illinois.edu/papers/DRSTV09.pdf}, year = 2009 } @inproceedings{strube2006wikirelate, abstract = {Wikipedia provides a knowledge base for computing word relatedness in a more structured fashion than a search engine and with more coverage than WordNet. In this work we present experiments on using Wikipedia for computing semantic relatedness and compare it to WordNet on various benchmarking datasets. Existing relatedness measures perform better using Wikipedia than a baseline given by Google counts, and we show that Wikipedia outperforms WordNet when applied to the largest available dataset designed for that purpose. The best results on this dataset are obtained by integrating Google, WordNet and Wikipedia based measures. We also show that including Wikipedia improves the performance of an NLP application processing naturally occurring texts.}, author = {Strube, Michael and Ponzetto, Simone Paolo}, booktitle = {AAAI}, crossref = {conf/aaai/2006}, file = {strube2006wikirelate.pdf:strube2006wikirelate.pdf:PDF}, groups = {public}, interhash = {a09d5123ab9ab8cb00b8df6f0a7f5c81}, intrahash = {9216a46b593c3319aa23d13ca8373beb}, publisher = {AAAI Press}, timestamp = {2011-02-02 14:02:02}, title = {WikiRelate! Computing Semantic Relatedness Using Wikipedia.}, url = {http://www.dit.unitn.it/~p2p/RelatedWork/Matching/aaai06.pdf}, username = {dbenz}, year = 2006 }