@inproceedings{jardine2014topical, address = {Gothenburg, Sweden}, author = {Jardine, James and Teufel, Simone}, booktitle = {Proceedings of the 14th Conference of the European Chapter of the Association for Computational Linguistics}, interhash = {147a857a9dc2cc83a91b4f67908995a8}, intrahash = {f4620195b04beda98c3f7336c4b96dd5}, month = {April}, pages = {501--510}, publisher = {Association for Computational Linguistics}, title = {Topical PageRank: A Model of Scientific Expertise for Bibliographic Search}, url = {http://www.aclweb.org/anthology/E14-1053}, year = 2014 } @inproceedings{yu2004temporal, abstract = {Web search is probably the single most important application on the Internet. The most famous search techniques are perhaps the PageRank and HITS algorithms. These algorithms are motivated by the observation that a hyperlink from a page to another is an implicit conveyance of authority to the target page. They exploit this social phenomenon to identify quality pages, e.g., "authority" pages and "hub" pages. In this paper we argue that these algorithms miss an important dimension of the Web, the temporal dimension. The Web is not a static environment. It changes constantly. Quality pages in the past may not be quality pages now or in the future. These techniques favor older pages because these pages have many in-links accumulated over time. New pages, which may be of high quality, have few or no in-links and are left behind. Bringing new and quality pages to users is important because most users want the latest information. Research publication search has exactly the same problem. This paper studies the temporal dimension of search in the context of research publication search. We propose a number of methods deal with the problem. Our experimental results show that these methods are highly effective.}, acmid = {1013519}, address = {New York, NY, USA}, author = {Yu, Philip S. and Li, Xin and Liu, Bing}, booktitle = {Proceedings of the 13th international World Wide Web conference on Alternate track papers \& posters}, doi = {10.1145/1013367.1013519}, interhash = {106cfcc83b1ea817d7961bdb346af9eb}, intrahash = {16f2087be646ae8430fd0ff514ec0cf1}, isbn = {1-58113-912-8}, location = {New York, NY, USA}, numpages = {2}, pages = {448--449}, publisher = {ACM}, series = {WWW Alt. '04}, title = {On the temporal dimension of search}, url = {http://doi.acm.org/10.1145/1013367.1013519}, year = 2004 } @article{brin1998anatomy, abstract = {In this paper, we present Google, a prototype of a large-scale search engine which makes heavy use of the structure present in hypertext. Google is designed to crawl and index the Web efficiently and produce much more satisfying search results than existing systems. The prototype with a full text and hyperlink database of at least 24 million pages is available at http://infolab.stanford.edu/~backrub/google.html To engineer a search engine is a challenging task. Search engines index tens to hundreds of millions of web pages involving a comparable number of distinct terms. They answer tens of millions of queries every day. Despite the importance of large-scale search engines on the web, very little academic research has been done on them. Furthermore, due to rapid advance in technology and web proliferation, creating a web search engine today is very different from three years ago. This paper provides an in-depth description of our large-scale web search engine -- the first such detailed public description w...}, author = {Brin, Sergey and Page, Lawrence}, interhash = {1234ad3633d435ef79d8a7f36dafa0a9}, intrahash = {1779c82bd34bbf1ca62956d136a22adf}, journal = {Computer Networks and ISDN Systems}, number = {1--7}, pages = {107--117}, title = {The anatomy of a large-scale hypertextual Web search engine}, url = {http://citeseer.ist.psu.edu/brin98anatomy.html}, volume = 30, year = 1998 } @incollection{koschtzki2005centrality, abstract = {Centrality indices are to quantify an intuitive feeling that in most networks some vertices or edges are more central than others. Many vertex centrality indices were introduced for the first time in the 1950s: e.g., the Bavelas index [50, 51], degree centrality [483] or a first feedback centrality, introduced by Seeley [510]. These early centralities raised a rush of research in which manifold applications were found. However, not every centrality index was suitable to every application, so with time, dozens of new centrality indices were published. This chapter will present some of the more influential, ‘classic’ centrality indices. We do not strive for completeness, but hope to give a catalog of basic centrality indices with some of their main applications.}, address = {Berlin / Heidelberg}, affiliation = {IPK Gatersleben, Corrensstraße 3, 06466 Gatersleben, Germany}, author = {Koschützki, Dirk and Lehmann, Katharina and Peeters, Leon and Richter, Stefan and Tenfelde-Podehl, Dagmar and Zlotowski, Oliver}, booktitle = {Network Analysis}, doi = {10.1007/978-3-540-31955-9_3}, editor = {Brandes, Ulrik and Erlebach, Thomas}, interhash = {8bfa60518049d9dbc7f6ce7b5c2914be}, intrahash = {567d2f61b08e78af53463b2a30729830}, isbn = {978-3-540-24979-5}, keyword = {Computer Science}, pages = {16-61}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, title = {Centrality Indices}, url = {http://dx.doi.org/10.1007/978-3-540-31955-9_3}, volume = 3418, year = 2005 } @article{springerlink:10.1007/s00778-010-0204-8, abstract = {Graph conductance queries, also known as personalized PageRank and related to random walks with restarts, were originally proposed to assign a hyperlink-based prestige score to Web pages. More general forms of such queries are also very useful for ranking in entity-relation (ER) graphs used to represent relational, XML and hypertext data. Evaluation of PageRank usually involves a global eigen computation. If the graph is even moderately large, interactive response times may not be possible. Recently, the need for interactive PageRank evaluation has increased. The graph may be fully known only when the query is submitted. Browsing actions of the user may change some inputs to the PageRank computation dynamically. In this paper, we describe a system that analyzes query workloads and the ER graph, invests in limited offline indexing, and exploits those indices to achieve essentially constant-time query processing, even as the graph size scales. Our techniques—data and query statistics collection, index selection and materialization, and query-time index exploitation—have parallels in the extensive relational query optimization literature, but is applied to supporting novel graph data repositories. We report on experiments with five temporal snapshots of the CiteSeer ER graph having 74–702 thousand entity nodes, 0.17–1.16 million word nodes, 0.29–3.26 million edges between entities, and 3.29–32.8 million edges between words and entities. We also used two million actual queries from CiteSeer’s logs. Queries run 3–4 orders of magnitude faster than whole-graph PageRank, the gap growing with graph size. Index size is smaller than a text index. Ranking accuracy is 94–98% with reference to whole-graph PageRank.}, address = {Berlin / Heidelberg}, affiliation = {IIT Bombay, Powai, Mumbai, Maharashtra India}, author = {Chakrabarti, Soumen and Pathak, Amit and Gupta, Manish}, doi = {10.1007/s00778-010-0204-8}, interhash = {96a2d92f703a13f77bae8f56372f3e1b}, intrahash = {dcc951cd461fe1c454db7a738429d421}, issn = {1066-8888}, journal = {The VLDB Journal}, keyword = {Computer Science}, pages = {1-26}, publisher = {Springer}, title = {Index design and query processing for graph conductance search}, url = {http://dx.doi.org/10.1007/s00778-010-0204-8}, year = 2010 } @misc{Maslov2009, abstract = { We review our recent work on applying the Google PageRank algorithm to find scientific "gems" among all Physical Review publications, and its extension to CiteRank, to find currently popular research directions. These metrics provide a meaningful extension to traditionally-used importance measures, such as the number of citations and journal impact factor. We also point out some pitfalls of over-relying on quantitative metrics to evaluate scientific quality. }, author = {Maslov, Sergei and Redner, S.}, interhash = {8f0a3a222a5c357e4db423ec065065da}, intrahash = {d2b34ecaa23078ebef7a7ee84be509a4}, note = {cite arxiv:0901.2640 Comment: 3 pages, 1 figure, invited comment for the Journal of Neuroscience. The arxiv version is microscopically different from the published version}, title = {Promise and Pitfalls of Extending Google's PageRank Algorithm to Citation Networks}, url = {http://arxiv.org/abs/0901.2640}, year = 2009 }