@article{liu2012fulltext, author = {Liu, Xiaozhong and Zhang, Jinsong and Guo, Chun}, interhash = {011df26355ad51a88947017fd2791a98}, intrahash = {f9c6133bf4503003822f99860f864698}, journal = {Journal of the American Society for Information Science and Technology}, title = {Full-Text Citation Analysis: A New Method to Enhance Scholarly Network}, url = {http://discern.uits.iu.edu:8790/publication/Full%20text%20citation.pdf}, year = 2012 } @techreport{ilprints750, abstract = {The original PageRank algorithm for improving the ranking of search-query results computes a single vector, using the link structure of the Web, to capture the relative ``importance'' of Web pages, independent of any particular search query. To yield more accurate search results, we propose computing a {\em set} of PageRank vectors, biased using a set of representative topics, to capture more accurately the notion of importance with respect to a particular topic. For ordinary keyword search queries, we compute the topic-sensitive PageRank scores for pages satisfying the query using the topic of the query keywords. For searches done in context (e.g., when the search query is performed by highlighting words in a Web page), we compute the topic-sensitive PageRank scores using the topic of the context in which the query appeared. By using linear combinations of these (precomputed) biased PageRank vectors to generate context-specific importance scores for pages at query time, we show that we can generate more accurate rankings than with a single, generic PageRank vector. }, author = {Haveliwala, Taher H.}, institution = {Stanford InfoLab}, interhash = {959ab9af6c35acb5d8513fa032620ba7}, intrahash = {34aedd24fc7a45f189be1ca70dfd99e2}, journal = {IEEE Transactions on Knowledge and Data Engineering}, note = {Extended version of the WWW2002 paper on Topic-Sensitive PageRank.}, number = {2003-29}, publisher = {Stanford InfoLab}, title = {Topic-Sensitive PageRank: A Context-Sensitive Ranking Algorithm for Web Search}, type = {Technical Report}, url = {http://ilpubs.stanford.edu:8090/750/}, year = 2003 } @article{Berkhin05asurvey, abstract = {Abstract. This survey reviews the research related to PageRank computing. Components of a PageRank vector serve as authority weights for web pages independent of their textual content, solely based on the hyperlink structure of the web. PageRank is typically used as a web search ranking component. This defines the importance of the model and the data structures that underly PageRank processing. Computing even a single PageRank is a difficult computational task. Computing many PageRanks is a much more complex challenge. Recently, significant effort has been invested in building sets of personalized PageRank vectors. PageRank is also used in many diverse applications other than ranking. We are interested in the theoretical foundations of the PageRank formulation, in the acceleration of PageRank computing, in the effects of particular aspects of web graph structure on the optimal organization of computations, and in PageRank stability. We also review alternative models that lead to authority indices similar to PageRank and the role of such indices in applications other than web search. We also discuss linkbased search personalization and outline some aspects of PageRank infrastructure from associated measures of convergence to link preprocessing. 1.}, author = {Berkhin, Pavel}, interhash = {a0b85e8e85f88c262934f5fdd05525af}, intrahash = {50de350b2ae298909eef39a11d0f682c}, journal = {Internet Mathematics}, pages = {73--120}, title = {A survey on pagerank computing}, url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.102.2294}, volume = 2, year = 2005 } @article{springerlink:10.1007/s00778-010-0204-8, abstract = {Graph conductance queries, also known as personalized PageRank and related to random walks with restarts, were originally proposed to assign a hyperlink-based prestige score to Web pages. More general forms of such queries are also very useful for ranking in entity-relation (ER) graphs used to represent relational, XML and hypertext data. Evaluation of PageRank usually involves a global eigen computation. If the graph is even moderately large, interactive response times may not be possible. Recently, the need for interactive PageRank evaluation has increased. The graph may be fully known only when the query is submitted. Browsing actions of the user may change some inputs to the PageRank computation dynamically. In this paper, we describe a system that analyzes query workloads and the ER graph, invests in limited offline indexing, and exploits those indices to achieve essentially constant-time query processing, even as the graph size scales. Our techniques—data and query statistics collection, index selection and materialization, and query-time index exploitation—have parallels in the extensive relational query optimization literature, but is applied to supporting novel graph data repositories. We report on experiments with five temporal snapshots of the CiteSeer ER graph having 74–702 thousand entity nodes, 0.17–1.16 million word nodes, 0.29–3.26 million edges between entities, and 3.29–32.8 million edges between words and entities. We also used two million actual queries from CiteSeer’s logs. Queries run 3–4 orders of magnitude faster than whole-graph PageRank, the gap growing with graph size. Index size is smaller than a text index. Ranking accuracy is 94–98% with reference to whole-graph PageRank.}, address = {Berlin / Heidelberg}, affiliation = {IIT Bombay, Powai, Mumbai, Maharashtra India}, author = {Chakrabarti, Soumen and Pathak, Amit and Gupta, Manish}, doi = {10.1007/s00778-010-0204-8}, interhash = {96a2d92f703a13f77bae8f56372f3e1b}, intrahash = {dcc951cd461fe1c454db7a738429d421}, issn = {1066-8888}, journal = {The VLDB Journal}, keyword = {Computer Science}, pages = {1-26}, publisher = {Springer}, title = {Index design and query processing for graph conductance search}, url = {http://dx.doi.org/10.1007/s00778-010-0204-8}, year = 2010 } @article{brin98anatomy, author = {Brin, Sergey and Page, Lawrence}, doi = {10.1016/S0169-7552(98)00110-X}, interhash = {1234ad3633d435ef79d8a7f36dafa0a9}, intrahash = {fc936cec60b1b7ab69f230f14139e8ab}, journal = {Computer Networks and ISDN Systems}, month = {April}, number = {1-7}, pages = {107--117}, title = {{T}he {A}natomy of a {L}arge-{S}cale {H}ypertextual {W}eb {S}earch {E}ngine}, volume = 30, year = 1998 } @misc{Maslov2009, abstract = { We review our recent work on applying the Google PageRank algorithm to find scientific "gems" among all Physical Review publications, and its extension to CiteRank, to find currently popular research directions. These metrics provide a meaningful extension to traditionally-used importance measures, such as the number of citations and journal impact factor. We also point out some pitfalls of over-relying on quantitative metrics to evaluate scientific quality. }, author = {Maslov, Sergei and Redner, S.}, interhash = {8f0a3a222a5c357e4db423ec065065da}, intrahash = {d2b34ecaa23078ebef7a7ee84be509a4}, note = {cite arxiv:0901.2640 Comment: 3 pages, 1 figure, invited comment for the Journal of Neuroscience. The arxiv version is microscopically different from the published version}, title = {Promise and Pitfalls of Extending Google's PageRank Algorithm to Citation Networks}, url = {http://arxiv.org/abs/0901.2640}, year = 2009 } @inproceedings{jaschke07recommender, author = {Jäschke, Robert and Marinho, Leandro Balby and Hotho, Andreas and Schmidt-Thieme, Lars and Stumme, Gerd}, bibsource = {DBLP, http://dblp.uni-trier.de}, booktitle = {Knowledge Discovery in Databases: PKDD 2007, 11th European Conference on Principles and Practice of Knowledge Discovery in Databases, Warsaw, Poland, September 17-21, 2007, Proceedings}, editor = {Kok, Joost N. and Koronacki, Jacek and de Mántaras, Ramon López and Matwin, Stan and Mladenic, Dunja and Skowron, Andrzej}, ee = {http://dx.doi.org/10.1007/978-3-540-74976-9_52}, interhash = {7e212e3bac146d406035adebff248371}, intrahash = {b8b87c78e9e27a44aacde0402c642bff}, isbn = {978-3-540-74975-2}, pages = {506-514}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, title = {Tag Recommendations in Folksonomies}, url = {http://www.kde.cs.uni-kassel.de/hotho/pub/2007/Tag_Recommender_in_Folksonomies_final.pdf}, vgwort = {20}, volume = 4702, year = 2007 } @inproceedings{hotho2006information, address = {Budva, Montenegro}, author = {Hotho, Andreas and Jäschke, Robert and Schmitz, Christoph and Stumme, Gerd}, booktitle = {Proceedings of the 3rd European Semantic Web Conference }, interhash = {10ec64d80b0ac085328a953bb494fb89}, intrahash = {7da1127fc4836e2cf58e3073f1b888b2}, isbn = {3-540-34544-2}, month = {June}, pages = {411-426}, publisher = {Springer}, series = {LNCS}, title = {Information Retrieval in Folksonomies: Search and Ranking}, url = {http://www.kde.cs.uni-kassel.de/hotho/pub/2006/seach2006hotho_eswc.pdf}, vgwort = {29}, volume = 4011, year = 2006 } @inproceedings{Pageetal98, address = {Brisbane, Australia}, author = {Page, L. and Brin, S. and Motwani, R. and Winograd, T.}, booktitle = {Proceedings of the 7th International World Wide Web Conference}, interhash = {ca10cf0b0dd668c64b1f378ff0775849}, intrahash = {ac49c33e114ca171db40cece6a0ae4d6}, pages = {161--172}, title = {The PageRank citation ranking: Bringing order to the Web}, url = {citeseer.nj.nec.com/page98pagerank.html}, year = 1998 } @inproceedings{conf/www/RichardsonPB06, author = {Richardson, Matthew and Prakash, Amit and Brill, Eric}, booktitle = {WWW}, crossref = {conf/www/2006}, date = {2006-07-17}, editor = {Carr, Les and Roure, David De and Iyengar, Arun and Goble, Carole A. and Dahlin, Michael}, ee = {http://doi.acm.org/10.1145/1135777.1135881}, interhash = {250243dc43965a58f15a3561465c0cf5}, intrahash = {4d2ddff0f0013f7d6cffc782c5eca56c}, isbn = {1-59593-323-9}, pages = {707-715}, publisher = {ACM}, title = {Beyond PageRank: machine learning for static ranking.}, url = {http://dblp.uni-trier.de/db/conf/www/www2006.html#RichardsonPB06}, year = 2006 } @article{10.1109/WI.2006.149, address = {Los Alamitos, CA, USA}, author = {Gori, Marco and Pucci, Augusto}, doi = {http://doi.ieeecomputersociety.org/10.1109/WI.2006.149}, interhash = {8e7d5fef1c0d9154bcbe05dc2493e228}, intrahash = {c9f50408ea15f2b34174e3cc3b5bf940}, isbn = {0-7695-2747-7}, journal = {wi}, pages = {778-781}, publisher = {IEEE Computer Society}, title = {Research Paper Recommender Systems: A Random-Walk Based Approach}, volume = 0, year = 2006 } @techreport{delong06concept, author = {DeLong, Colin and Mane, Sandeep and Srivastava, Jaideep}, interhash = {9d474013aee7d0d1fdb4c4c55dcdd4ac}, intrahash = {3d0477dac3614b173f144a389b01158a}, number = {06-007}, publisher = {University of Minnesota}, title = {Concept-Aware Ranking: Teaching an Old Graph New Moves}, url = {http://www.cs.umn.edu/research/technical_reports.php?page=report&report_id=06-007}, year = 2006 } @misc{eigenvector2005Langville, abstract = {Web information retrieval is significantly more challenging than traditional well-controlled, small document collection information retrieval. One main difference between traditional information retrieval and Web information retrieval is the Web’s hyperlink structure. This structure has been exploited by several of today’s leading Web search engines, particularly Google and Teoma. In this survey paper, we focus on Web information retrieval methods that use eigenvector computations, presenting the three popular methods of HITS, PageRank, and SALSA.}, author = {Langville, Amy N. and Meyer, Carl D.}, interhash = {d457071e1f5270c3d50cbb3243546833}, intrahash = {445172dea700200486177842e9dfe3cb}, journal = {The SIAM Review}, number = 1, pages = {135-161}, title = {A Survey of Eigenvector Methods of Web Information Retrieval}, url = {http://www.cofc.edu/~langvillea/surveyEVwebIRReprint.pdf}, volume = 47, year = 2005 } @inproceedings{haveliwala02topicsensitive, address = {Honolulu, Hawaii}, author = {Haveliwala, Taher H.}, booktitle = {Proceedings of the Eleventh International World Wide Web Conference}, interhash = {29a20afd5026732686509987f603d33d}, intrahash = {c056611effc0d18aae71a6d535ff6c5a}, month = May, title = {Topic-sensitive PageRank}, url = {http://citeseer.csail.mit.edu/haveliwala02topicsensitive.html}, year = 2002 } @misc{Brinkmeier2006, author = {Brinkmeier, M .}, date = {(August 2006)}, interhash = {c5ad30b2379993a46d082508035c15c6}, intrahash = {1456ecdf8c4bcf7cbf8eb512d0069252}, journal = {ACM Transactions on Internet Technologies}, number = 3, title = {PageRank revisited}, url = {http://eiche.theoinf.tu-ilmenau.de/~mbrinkme/documents/pagerank.pdf }, volume = 6, year = 2006 } @techreport{284, author = {Gyongyi, Z. and Berkhin, P. and Garcia-Molina, H. and Pedersen, J.}, institution = {Stanford Univ.}, interhash = {987e3d3fd3c529a2662a5387bc568793}, intrahash = {4dda644faa9132ef2f09ac8a13f11d75}, title = {Link spam detection based on mass estimation}, url = {http://infolab.stanford.edu/~zoltan/publications/gyongyi2006link.pdf}, year = 2005 } @inproceedings{1148225, address = {New York, NY, USA}, author = {Baeza-Yates, Ricardo and Boldi, Paolo and Castillo, Carlos}, booktitle = {SIGIR '06: Proceedings of the 29th annual international ACM SIGIR conference on Research and development in information retrieval}, doi = {http://doi.acm.org/10.1145/1148170.1148225}, interhash = {1096ec9acdf545a40d1a8f4ef0bc9923}, intrahash = {c6247928eadbb74c6e7a7a2f754e2963}, isbn = {1-59593-369-7}, location = {Seattle, Washington, USA}, pages = {308--315}, publisher = {ACM Press}, title = {Generalizing PageRank: damping functions for link-based ranking algorithms}, year = 2006 } @article{collins75, author = {Collins, A.M. and Loftus}, interhash = {c7a1c732ee055292c4f26a99524f1451}, intrahash = {69ef818e9be84f1edda446440c6ded08}, journal = {Psychological Review}, pages = {407--428}, title = {A spreading activation theory of semantic memory}, volume = 82, year = 1975 } @misc{haveliwala03second, author = {Haveliwala, T. and Kamvar, S.}, interhash = {0d40b575bd05c58b401f7a78ad3d4627}, intrahash = {060fb6a057336a06d65a55344735e3f6}, text = {T. H. Haveliwala and S. D. Kamvar. The second eigenvalue of the Google matrix. Stanford University Technical Report, 2003.}, title = {The second eigenvalue of the Google matrix}, url = {http://citeseer.ist.psu.edu/haveliwala03second.html}, year = 2003 }