@inproceedings{ring2015condist, author = {Ring, Markus and Otto, Florian and Becker, Martin and Niebler, Thomas and Landes, Dieter and Hotho, Andreas}, editor = {ECMLPKDD2015}, interhash = {c062a57a17a0910d6c27ecd664502ac1}, intrahash = {a2f9d649f2856677e4d886a3b517404d}, title = {ConDist: A Context-Driven Categorical Distance Measure}, year = 2015 } @inproceedings{conf/conll/LevyG14, author = {Levy, Omer and Goldberg, Yoav}, booktitle = {CoNLL}, crossref = {conf/conll/2014}, editor = {Morante, Roser and tau Yih, Wen}, ee = {http://aclweb.org/anthology/W/W14/W14-1618.pdf}, interhash = {680dde1fd83a8dd0d6b2619a8266516e}, intrahash = {23bb00b6abab97ed93e74f3b5b148630}, isbn = {978-1-941643-02-0}, pages = {171-180}, publisher = {ACL}, title = {Linguistic Regularities in Sparse and Explicit Word Representations.}, url = {http://dblp.uni-trier.de/db/conf/conll/conll2014.html#LevyG14}, year = 2014 } @article{mitzlaff2012relatedness, author = {Mitzlaff, Folke and Stumme, G}, interhash = {31f7605431c35592afa50e7a377ce999}, intrahash = {072c87ef744216d0245f28c5f29ce851}, journal = {Human Journal}, number = 4, pages = {205--217}, title = {Relatedness of given names}, volume = 1, year = 2012 } @inproceedings{ollivier2007finding, abstract = {We introduce a new method for finding nodes semantically related to a given node in a hyperlinked graph: the Green method, based on a classical Markov chain tool. It is generic, adjustment-free and easy to implement. We test it in the case of the hyperlink structure of the English version of Wikipedia, the on-line encyclopedia. We present an extensive comparative study of the performance of our method versus several other classical methods in the case of Wikipedia. The Green method is found to have both the best average results and the best robustness.}, acmid = {1619874}, author = {Ollivier, Yann and Senellart, Pierre}, booktitle = {Proceedings of the 22nd national conference on Artificial intelligence}, interhash = {a291b1b4e195dd09a11c8ffe329fc0e5}, intrahash = {76e219fe6e8a257b30c6665af8b273da}, isbn = {978-1-57735-323-2}, location = {Vancouver, British Columbia, Canada}, numpages = {7}, pages = {1427--1433}, publisher = {AAAI Press}, title = {Finding related pages using Green measures: an illustration with Wikipedia}, url = {http://dl.acm.org/citation.cfm?id=1619797.1619874}, volume = 2, year = 2007 } @inproceedings{milne2008effective, abstract = {This paper describes a new technique for obtaining measures of semantic relatedness. Like other recent approaches, it uses Wikipedia to provide structured world knowledge about the terms of interest. Out approach is unique in that it does so using the hyperlink structure of Wikipedia rather than its category hierarchy or textual content. Evaluation with manually defined measures of semantic relatedness reveals this to be an effective compromise between the ease of computation of the former approach and the accuracy of the latter.}, author = {Milne, David and Witten, Ian H.}, booktitle = {Proceeding of AAAI Workshop on Wikipedia and Artificial Intelligence: an Evolving Synergy}, interhash = {f8b0b3ba8f4a1c20e3d5d732a221f102}, intrahash = {f4daaa2541e89bdd1996c42f8f4b3a4b}, month = jul, pages = {25--30}, publisher = {AAAI Press}, title = {An effective, low-cost measure of semantic relatedness obtained from Wikipedia links}, url = {https://www.aaai.org/Papers/Workshops/2008/WS-08-15/WS08-15-005.pdf}, year = 2008 } @article{cha2007comprehensive, abstract = {Distance or similarity measures are essential to solve many pattern recognition problems such as classification, clustering, and retrieval problems. Various distance/similarity measures that are applicable to compare two probability density functions, pdf in short, are reviewed and categorized in both syntactic and semantic relationships. A correlation coefficient and a hierarchical clustering technique are adopted to reveal similarities among numerous distance/similarity measures.}, author = {Cha, Sung-Hyuk}, interhash = {dfaf5e38d33eaab89f3643b242910c81}, intrahash = {69e7c9ba92a049efa4c70f8f0bfdb4ea}, journal = {International Journal of Mathematical Models and Methods in Applied Sciences}, number = 4, pages = {300--307}, title = {Comprehensive Survey on Distance/Similarity Measures between Probability Density Functions}, url = {http://www.gly.fsu.edu/~parker/geostats/Cha.pdf}, volume = 1, year = 2007 } @inproceedings{Strube:2006:WCS:1597348.1597414, abstract = {Wikipedia provides a knowledge base for computing word relatedness in a more structured fashion than a search engine and with more coverage than WordNet. In this work we present experiments on using Wikipedia for computing semantic relatedness and compare it to WordNet on various benchmarking datasets. Existing relatedness measures perform better using Wikipedia than a baseline given by Google counts, and we show that Wikipedia outperforms WordNet when applied to the largest available dataset designed for that purpose. The best results on this dataset are obtained by integrating Google, WordNet and Wikipedia based measures. We also show that including Wikipedia improves the performance of an NLP application processing naturally occurring texts.}, acmid = {1597414}, author = {Strube, Michael and Ponzetto, Simone Paolo}, booktitle = {proceedings of the 21st national conference on Artificial intelligence - Volume 2}, interhash = {a09d5123ab9ab8cb00b8df6f0a7f5c81}, intrahash = {3ed30e1d22b977de9a4a50c929200d2d}, isbn = {978-1-57735-281-5}, location = {Boston, Massachusetts}, numpages = {6}, pages = {1419--1424}, publisher = {AAAI Press}, series = {AAAI'06}, title = {WikiRelate! computing semantic relatedness using wikipedia}, url = {http://dl.acm.org/citation.cfm?id=1597348.1597414}, year = 2006 } @inproceedings{cattuto2008semantica, abstract = {Collaborative tagging systems have nowadays become important data sources for populating semantic web applications. For taskslike synonym detection and discovery of concept hierarchies, many researchers introduced measures of tag similarity. Eventhough most of these measures appear very natural, their design often seems to be rather ad hoc, and the underlying assumptionson the notion of similarity are not made explicit. A more systematic characterization and validation of tag similarity interms of formal representations of knowledge is still lacking. Here we address this issue and analyze several measures oftag similarity: Each measure is computed on data from the social bookmarking system del.icio.us and a semantic grounding isprovided by mapping pairs of similar tags in the folksonomy to pairs of synsets in Wordnet, where we use validated measuresof semantic distance to characterize the semantic relation between the mapped tags. This exposes important features of theinvestigated similarity measures and indicates which ones are better suited in the context of a given semantic application.}, address = {Heidelberg}, author = {Cattuto, Ciro and Benz, Dominik and Hotho, Andreas and Stumme, Gerd}, booktitle = {The Semantic Web -- ISWC 2008, Proc.Intl. Semantic Web Conference 2008}, doi = {http://dx.doi.org/10.1007/978-3-540-88564-1_39}, editor = {Sheth, Amit P. and Staab, Steffen and Dean, Mike and Paolucci, Massimo and Maynard, Diana and Finin, Timothy W. and Thirunarayan, Krishnaprasad}, file = {cattuto2008semantica.pdf:cattuto2008semantica.pdf:PDF}, groups = {public}, interhash = {b44538648cfd476d6c94e30bc6626c86}, intrahash = {27198c985b3bdb6daab0f7e961b370a9}, pages = {615--631}, publisher = {Springer}, series = {LNAI}, title = {Semantic Grounding of Tag Relatedness in Social Bookmarking Systems}, url = {http://www.kde.cs.uni-kassel.de/pub/pdf/cattuto2008semantica.pdf}, username = {dbenz}, volume = 5318, year = 2008 } @article{blondel2004measure, abstract = {We introduce a concept of {similarity} between vertices of directed graphs. Let GA and GB be two directed graphs with, respectively, nA and nB vertices. We define an nB times nA similarity matrix S whose real entry sij expresses how similar vertex j (in GA) is to vertex i (in GB): we say that sij is their similarity score. The similarity matrix can be obtained as the limit of the normalized even iterates of Sk+1 = BSkAT + BTSkA, where A and B are adjacency matrices of the graphs and S0 is a matrix whose entries are all equal to 1. In the special case where GA = GB = G, the matrix S is square and the score sij is the similarity score between the vertices i and j of G. We point out that Kleinberg's "hub and authority" method to identify web-pages relevant to a given query can be viewed as a special case of our definition in the case where one of the graphs has two vertices and a unique directed edge between them. In analogy to Kleinberg, we show that our similarity scores are given by the components of a dominant eigenvector of a nonnegative matrix. Potential applications of our similarity concept are numerous. We illustrate an application for the automatic extraction of synonyms in a monolingual dictionary.}, address = {Philadelphia, PA, USA}, author = {Blondel, Vincent D. and Gajardo, Anah\'{\i} and Heymans, Maureen and Senellart, Pierre and Dooren, Paul Van}, doi = {http://dx.doi.org/10.1137/S0036144502415960}, interhash = {b59d33c99477e70a646615cd0470f459}, intrahash = {fbaef7a3057ff12e16dfd65c42fb0239}, issn = {0036-1445}, journal = {SIAM Rev.}, number = 4, pages = {647--666}, publisher = {Society for Industrial and Applied Mathematics}, title = {A Measure of Similarity between Graph Vertices: Applications to Synonym Extraction and Web Searching}, url = {http://portal.acm.org/citation.cfm?id=1035533.1035557}, volume = 46, year = 2004 } @inproceedings{agirre2009study, abstract = {This paper presents and compares WordNet-based and distributional similarity approaches. The strengths and weaknesses of each approach regarding similarity and relatedness tasks are discussed, and a combination is presented. Each of our methods independently provide the best results in their class on the RG and WordSim353 datasets, and a supervised combination of them yields the best published results on all datasets. Finally, we pioneer cross-lingual similarity, showing that our methods are easily adapted for a cross-lingual task with minor losses.}, acmid = {1620758}, address = {Stroudsburg, PA, USA}, author = {Agirre, Eneko and Alfonseca, Enrique and Hall, Keith and Kravalova, Jana and Pa\c{s}ca, Marius and Soroa, Aitor}, booktitle = {Proceedings of Human Language Technologies: The 2009 Annual Conference of the North American Chapter of the Association for Computational Linguistics}, interhash = {35326b1cfd5cde92744c22b981c84b23}, intrahash = {e91cce045a1d323c0012027f04771733}, isbn = {978-1-932432-41-1}, location = {Boulder, Colorado}, numpages = {9}, pages = {19--27}, publisher = {Association for Computational Linguistics}, series = {NAACL '09}, title = {A study on similarity and relatedness using distributional and WordNet-based approaches}, url = {http://portal.acm.org/citation.cfm?id=1620754.1620758}, year = 2009 } @inproceedings{Bollegala07semanticSearch, address = {New York, NY, USA}, author = {Bollegala, Danushka and Matsuo, Yutaka and Ishizuka, Mitsuru}, booktitle = {WWW '07: Proceedings of the 16th international conference on World Wide Web}, doi = {http://doi.acm.org/10.1145/1242572.1242675}, interhash = {46247eb09b5e87a6e5d4a8b2cf821ee7}, intrahash = {c957aa2fd65df63c8c4af14b1fc827c5}, isbn = {978-1-59593-654-7}, location = {Banff, Alberta, Canada}, pages = {757--766}, publisher = {ACM}, title = {Measuring semantic similarity between words using web search engines}, year = 2007 } @article{malt1999knowing, author = {Malt, B.C. and Sloman, S.A. and Gennari, S. and Shi, M. and Wang, Y.}, interhash = {c87af87d81af41f432ec16f1e21e70f0}, intrahash = {5d022183458db304cc8dc59bff4506d0}, issn = {0749-596X}, journal = {Journal of Memory and Language}, pages = {230--262}, publisher = {Citeseer}, title = {{Knowing versus naming: Similarity and the linguistic categorization of artifacts}}, url = {http://scholar.google.de/scholar.bib?q=info:43NPl_inXfsJ:scholar.google.com/&output=citation&hl=de&as_sdt=2000&as_vis=1&ct=citation&cd=0}, volume = 40, year = 1999 } @inproceedings{bollegala2007measuring, address = {New York, NY, USA}, author = {Bollegala, Danushka and Matsuo, Yutaka and Ishizuka, Mitsuru}, booktitle = {WWW '07: Proceedings of the 16th international conference on World Wide Web}, doi = {http://doi.acm.org/10.1145/1242572.1242675}, file = {:bollegala2007measuring.pdf:PDF}, interhash = {46247eb09b5e87a6e5d4a8b2cf821ee7}, intrahash = {c957aa2fd65df63c8c4af14b1fc827c5}, isbn = {978-1-59593-654-7}, location = {Banff, Alberta, Canada}, pages = {757--766}, publisher = {ACM}, title = {Measuring semantic similarity between words using web search engines}, url = {http://www2007.org/papers/paper632.pdf}, year = 2007 } @inproceedings{maedche2002measuring, address = {London, UK}, author = {Maedche, Alexander and Staab, Steffen}, booktitle = {EKAW '02: Proceedings of the 13th International Conference on Knowledge Engineering and Knowledge Management. Ontologies and the Semantic Web}, file = {:maedche2002measuring.pdf:PDF}, interhash = {d5b06cd1af41e25a751ab755fb3a0068}, intrahash = {3a3b029259f39e1e1893012f5e8a7b1e}, pages = {251--263}, publisher = {Springer-Verlag}, title = {Measuring Similarity between Ontologies}, url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.131.5761&rep=rep1&type=pdf}, year = 2002 } @inproceedings{crandall2008feedback, abstract = {A fundamental open question in the analysis of social networks is to understand the interplay between similarity and social ties. People are similar to their neighbors in a social network for two distinct reasons: first, they grow to resemble their current friends due to social influence; and second, they tend to form new links to others who are already like them, a process often termed selection by sociologists. While both factors are present in everyday social processes, they are in tension: social influence can push systems toward uniformity of behavior, while selection can lead to fragmentation. As such, it is important to understand the relative effects of these forces, and this has been a challenge due to the difficulty of isolating and quantifying them in real settings.}, address = {New York, NY, USA}, at = {2009-07-01 08:09:57}, author = {Crandall, David and Cosley, Dan and Huttenlocher, Daniel and Kleinberg, Jon and Suri, Siddharth}, booktitle = {KDD '08: Proceeding of the 14th ACM SIGKDD international conference on Knowledge discovery and data mining}, doi = {10.1145/1401890.1401914}, id = {3353096}, interhash = {07ad80d96b769ae60741f4269fccd544}, intrahash = {64d218d536296955df9780a23d9f2aec}, isbn = {978-1-60558-193-4}, location = {Las Vegas, Nevada, USA}, pages = {160--168}, priority = {3}, publisher = {ACM}, title = {Feedback effects between similarity and social influence in online communities}, url = {http://dx.doi.org/10.1145/1401890.1401914}, year = 2008 } @inproceedings{le2007current, abstract = {Ontologies are widely used and play important roles in applications related to knowledge management, artificial intelligence, natural language processing, etc. Measuring the semantic similarity between ontological concepts is necessary in applications that use ontologies. This paper presents a survey of approaches to compute ontological concept similarity. A taxonomy showing the classification of approaches is introduced. The advantages and disadvantages of each approach are discussed.}, author = {Le, Duy Ngan and Goh, A.E.S.}, doi = {10.1109/SKG.2007.16}, interhash = {abe9003dbe2bc43bef22e4249f55746a}, intrahash = {356c507e72532460c1886974fa04d4c4}, journal = {Semantics, Knowledge and Grid, Third International Conference on}, month = {Oct.}, pages = {266-269}, title = {Current Practices in Measuring Ontological Concept Similarity}, year = 2007 } @inproceedings{cattuto2008semantica, abstract = {Collaborative tagging systems have nowadays become important data sources for populating semantic web applications. For taskslike synonym detection and discovery of concept hierarchies, many researchers introduced measures of tag similarity. Eventhough most of these measures appear very natural, their design often seems to be rather ad hoc, and the underlying assumptionson the notion of similarity are not made explicit. A more systematic characterization and validation of tag similarity interms of formal representations of knowledge is still lacking. Here we address this issue and analyze several measures oftag similarity: Each measure is computed on data from the social bookmarking system del.icio.us and a semantic grounding isprovided by mapping pairs of similar tags in the folksonomy to pairs of synsets in Wordnet, where we use validated measuresof semantic distance to characterize the semantic relation between the mapped tags. This exposes important features of theinvestigated similarity measures and indicates which ones are better suited in the context of a given semantic application.}, address = {Heidelberg}, author = {Cattuto, Ciro and Benz, Dominik and Hotho, Andreas and Stumme, Gerd}, booktitle = {The Semantic Web -- ISWC 2008, Proc.Intl. Semantic Web Conference 2008}, doi = {http://dx.doi.org/10.1007/978-3-540-88564-1_39}, editor = {Sheth, Amit P. and Staab, Steffen and Dean, Mike and Paolucci, Massimo and Maynard, Diana and Finin, Timothy W. and Thirunarayan, Krishnaprasad}, file = {cattuto2008semantica.pdf:cattuto2008semantica.pdf:PDF}, groups = {public}, interhash = {b44538648cfd476d6c94e30bc6626c86}, intrahash = {27198c985b3bdb6daab0f7e961b370a9}, pages = {615--631}, publisher = {Springer}, series = {LNAI}, timestamp = {2009-09-14 19:12:46}, title = {Semantic Grounding of Tag Relatedness in Social Bookmarking Systems}, url = {http://www.kde.cs.uni-kassel.de/pub/pdf/cattuto2008semantica.pdf}, username = {dbenz}, volume = 5318, year = 2008 } @inproceedings{zhao2006timedependent, abstract = {It has become a promising direction to measure similarity of Web search queries by mining the increasing amount of click-through data logged by Web search engines, which record the interactions between users and the search engines. Most existing approaches employ the click-through data for similarity measure of queries with little consideration of the temporal factor, while the click-through data is often dynamic and contains rich temporal information. In this paper we present a new framework of time-dependent query semantic similarity model on exploiting the temporal characteristics of historical click-through data. The intuition is that more accurate semantic similarity values between queries can be obtained by taking into account the timestamps of the log data. With a set of user-defined calendar schema and calendar patterns, our time-dependent query similarity model is constructed using the marginalized kernel technique, which can exploit both explicit similarity and implicit semantics from the click-through data effectively. Experimental results on a large set of click-through data acquired from a commercial search engine show that our time-dependent query similarity model is more accurate than the existing approaches. Moreover, we observe that our time-dependent query similarity model can, to some extent, reflect real-world semantics such as real-world events that are happening over time.}, address = {New York, NY, USA}, author = {Zhao, Qiankun and Hoi, Steven C. H. and Liu, Tie-Yan and Bhowmick, Sourav S. and Lyu, Michael R. and Ma, Wei-Ying}, booktitle = {WWW '06: Proceedings of the 15th international conference on World Wide Web}, doi = {10.1145/1135777.1135858}, interhash = {c765e101c37f6b530e2c1c59808048d7}, intrahash = {57cbc64550d3a1b5b8599a0783e95111}, isbn = {1-59593-323-9}, location = {Edinburgh, Scotland}, pages = {543--552}, publisher = {ACM}, title = {Time-dependent semantic similarity measure of queries using historical click-through data}, url = {http://portal.acm.org/citation.cfm?id=1135777.1135858}, year = 2006 } @article{jeh-simrank, author = {Jeh, G. and Widom, J.}, interhash = {a6d4531690305dc44937118df813b4b5}, intrahash = {ba5c057884730db377909072044ee03e}, title = {{SimRank: A measure of structural-context similarity}}, url = {http://scholar.google.de/scholar.bib?q=info:W8wKRBpAlMsJ:scholar.google.com/&output=citation&hl=de&ct=citation&cd=0}, year = 2002 } @article{christian2009withinnetwork, abstract = {Within-network classification, where the goal is to classify the nodes of a partly labeled network, is a semi-supervised learning problem that has applications in several important domains like image processing, the classification of documents, and thedetection of malicious activities. While most methods for this problem infer the missing labels collectively based on thehypothesis that linked or nearby nodes are likely to have the same labels, there are many types of networks for which thisassumption fails, e.g., molecular graphs, trading networks, etc. In this paper, we present a collective classification method,based on relaxation labeling, that classifies entities of a network using their local structure. This method uses a marginalizedsimilarity kernel that compares the local structure of two nodes with random walks in the network. Through experimentationon different datasets, we show our method to be more accurate than several state-of-the-art approaches for this problem.}, author = {Desrosiers, Christian and Karypis, George}, interhash = {5db04cc3cfea4d9777a55c7c9a44f71c}, intrahash = {fbcbbf5c016ec86fe15591e70f71b66b}, journal = {Machine Learning and Knowledge Discovery in Databases}, pages = {260--275}, title = {Within-Network Classification Using Local Structure Similarity}, url = {http://dx.doi.org/10.1007/978-3-642-04180-8_34}, year = 2009 }