@article{blondel2004measure, abstract = {We introduce a concept of {similarity} between vertices of directed graphs. Let GA and GB be two directed graphs with, respectively, nA and nB vertices. We define an nB times nA similarity matrix S whose real entry sij expresses how similar vertex j (in GA) is to vertex i (in GB): we say that sij is their similarity score. The similarity matrix can be obtained as the limit of the normalized even iterates of Sk+1 = BSkAT + BTSkA, where A and B are adjacency matrices of the graphs and S0 is a matrix whose entries are all equal to 1. In the special case where GA = GB = G, the matrix S is square and the score sij is the similarity score between the vertices i and j of G. We point out that Kleinberg's "hub and authority" method to identify web-pages relevant to a given query can be viewed as a special case of our definition in the case where one of the graphs has two vertices and a unique directed edge between them. In analogy to Kleinberg, we show that our similarity scores are given by the components of a dominant eigenvector of a nonnegative matrix. Potential applications of our similarity concept are numerous. We illustrate an application for the automatic extraction of synonyms in a monolingual dictionary.}, address = {Philadelphia, PA, USA}, author = {Blondel, Vincent D. and Gajardo, Anah\'{\i} and Heymans, Maureen and Senellart, Pierre and Dooren, Paul Van}, doi = {http://dx.doi.org/10.1137/S0036144502415960}, interhash = {b59d33c99477e70a646615cd0470f459}, intrahash = {fbaef7a3057ff12e16dfd65c42fb0239}, issn = {0036-1445}, journal = {SIAM Rev.}, number = 4, pages = {647--666}, publisher = {Society for Industrial and Applied Mathematics}, title = {A Measure of Similarity between Graph Vertices: Applications to Synonym Extraction and Web Searching}, url = {http://portal.acm.org/citation.cfm?id=1035533.1035557}, volume = 46, year = 2004 } @inproceedings{ilprints956, abstract = {A fundamental premise of tagging systems is that regular users can organize large collections for browsing and other tasks using uncontrolled vocabularies. Until now, that premise has remained relatively unexamined. Using library data, we test the tagging approach to organizing a collection. We find that tagging systems have three major large scale organizational features: consistency, quality, and completeness. In addition to testing these features, we present results suggesting that users produce tags similar to the topics designed by experts, that paid tagging can effectively supplement tags in a tagging system, and that information integration may be possible across tagging systems.}, author = {Heymann, Paul and Paepcke, Andreas and Garcia-Molina, Hector}, booktitle = {Third ACM International Conference on Web Search and Data Mining (WSDM2010)}, interhash = {d4f72ed57e6b99dbe32e18e218d81ef5}, intrahash = {6aaa478f3700f435ec3a1c1cba0046da}, month = {February}, pages = {1--10}, publisher = {Stanford University}, title = {Tagging Human Knowledge}, url = {http://ilpubs.stanford.edu:8090/956/}, year = 2010 }