@inproceedings{dou2008clickthrough, abstract = {Learning-to-rank algorithms, which can automatically adapt ranking functions in web search, require a large volume of training data. A traditional way of generating training examples is to employ human experts to judge the relevance of documents. Unfortunately, it is difficult, time-consuming and costly. In this paper, we study the problem of exploiting click-through data for learning web search rankings that can be collected at much lower cost. We extract pairwise relevance preferences from a large-scale aggregated click-through dataset, compare these preferences with explicit human judgments, and use them as training examples to learn ranking functions. We find click-through data are useful and effective in learning ranking functions. A straightforward use of aggregated click-through data can outperform human judgments. We demonstrate that the strategies are only slightly affected by fraudulent clicks. We also reveal that the pairs which are very reliable, e.g., the pairs consisting of documents with large click frequency differences, are not sufficient for learning.}, address = {New York, NY, USA}, author = {Dou, Zhicheng and Song, Ruihua and Yuan, Xiaojie and Wen, Ji-Rong}, booktitle = {CIKM '08: Proceeding of the 17th ACM conference on Information and knowledge management}, doi = {10.1145/1458082.1458095}, interhash = {1265bf68c0ea0ff5c2b4ee6c4a309378}, intrahash = {5febcfb066de4ae1159bd8245a70fbc8}, isbn = {978-1-59593-991-3}, location = {Napa Valley, California, USA}, pages = {73--82}, publisher = {ACM}, title = {Are click-through data adequate for learning web search rankings?}, url = {http://portal.acm.org/citation.cfm?id=1458095}, year = 2008 } @article{joachims2007evaluating, abstract = {This article examines the reliability of implicit feedback generated from clickthrough data and query reformulations in World Wide Web (WWW) search. Analyzing the users' decision process using eyetracking and comparing implicit feedback against manual relevance judgments, we conclude that clicks are informative but biased. While this makes the interpretation of clicks as absolute relevance judgments difficult, we show that relative preferences derived from clicks are reasonably accurate on average. We find that such relative preferences are accurate not only between results from an individual query, but across multiple sets of results within chains of query reformulations.}, address = {New York, NY, USA}, author = {Joachims, Thorsten and Granka, Laura and Pan, Bing and Hembrooke, Helene and Radlinski, Filip and Gay, Geri}, doi = {10.1145/1229179.1229181}, interhash = {bce701d0f64f0a9e87cc728550b90592}, intrahash = {fbbc1bfbaf782b745d550496e1715a66}, issn = {1046-8188}, journal = {ACM Trans. Inf. Syst.}, number = 2, pages = 7, publisher = {ACM}, title = {Evaluating the accuracy of implicit feedback from clicks and query reformulations in Web search}, url = {http://portal.acm.org/citation.cfm?id=1229181}, volume = 25, year = 2007 } @inproceedings{joachims2005accurately, abstract = {This paper examines the reliability of implicit feedback generated from clickthrough data in WWW search. Analyzing the users' decision process using eyetracking and comparing implicit feedback against manual relevance judgments, we conclude that clicks are informative but biased. While this makes the interpretation of clicks as absolute relevance judgments difficult, we show that relative preferences derived from clicks are reasonably accurate on average.}, address = {New York, NY, USA}, author = {Joachims, Thorsten and Granka, Laura and Pan, Bing and Hembrooke, Helene and Gay, Geri}, booktitle = {SIGIR '05: Proceedings of the 28th annual international ACM SIGIR conference on Research and development in information retrieval}, doi = {10.1145/1076034.1076063}, interhash = {050982b76855a6b1258ed0b40cb69018}, intrahash = {38c84b26177b6c8b22c6016bc22ab18c}, isbn = {1-59593-034-5}, location = {Salvador, Brazil}, pages = {154--161}, publisher = {ACM}, title = {Accurately interpreting clickthrough data as implicit feedback}, url = {http://portal.acm.org/citation.cfm?id=1076063}, year = 2005 } @inproceedings{zhao2006timedependent, abstract = {It has become a promising direction to measure similarity of Web search queries by mining the increasing amount of click-through data logged by Web search engines, which record the interactions between users and the search engines. Most existing approaches employ the click-through data for similarity measure of queries with little consideration of the temporal factor, while the click-through data is often dynamic and contains rich temporal information. In this paper we present a new framework of time-dependent query semantic similarity model on exploiting the temporal characteristics of historical click-through data. The intuition is that more accurate semantic similarity values between queries can be obtained by taking into account the timestamps of the log data. With a set of user-defined calendar schema and calendar patterns, our time-dependent query similarity model is constructed using the marginalized kernel technique, which can exploit both explicit similarity and implicit semantics from the click-through data effectively. Experimental results on a large set of click-through data acquired from a commercial search engine show that our time-dependent query similarity model is more accurate than the existing approaches. Moreover, we observe that our time-dependent query similarity model can, to some extent, reflect real-world semantics such as real-world events that are happening over time.}, address = {New York, NY, USA}, author = {Zhao, Qiankun and Hoi, Steven C. H. and Liu, Tie-Yan and Bhowmick, Sourav S. and Lyu, Michael R. and Ma, Wei-Ying}, booktitle = {WWW '06: Proceedings of the 15th international conference on World Wide Web}, doi = {10.1145/1135777.1135858}, interhash = {c765e101c37f6b530e2c1c59808048d7}, intrahash = {57cbc64550d3a1b5b8599a0783e95111}, isbn = {1-59593-323-9}, location = {Edinburgh, Scotland}, pages = {543--552}, publisher = {ACM}, title = {Time-dependent semantic similarity measure of queries using historical click-through data}, url = {http://portal.acm.org/citation.cfm?id=1135777.1135858}, year = 2006 } @inproceedings{1031192, abstract = {The performance of web search engines may often deteriorate due to the diversity and noisy information contained within web pages. User click-through data can be used to introduce more accurate description (metadata) for web pages, and to improve the search performance. However, noise and incompleteness, sparseness, and the volatility of web pages and queries are three major challenges for research work on user click-through log mining. In this paper, we propose a novel iterative reinforced algorithm to utilize the user click-through data to improve search performance. The algorithm fully explores the interrelations between queries and web pages, and effectively finds "virtual queries" for web pages and overcomes the challenges discussed above. Experiment results on a large set of MSN click-through log data show a significant improvement on search performance over the naive query log mining algorithm as well as the baseline search engine.}, address = {New York, NY, USA}, author = {Xue, Gui-Rong and Zeng, Hua-Jun and Chen, Zheng and Yu, Yong and Ma, Wei-Ying and Xi, WenSi and Fan, WeiGuo}, booktitle = {CIKM '04: Proceedings of the thirteenth ACM international conference on Information and knowledge management}, doi = {10.1145/1031171.1031192}, interhash = {31bb65c1b57888b0529c1a11e981bbe8}, intrahash = {2c9841b484ade7e9a8c9220662190c16}, isbn = {1-58113-874-1}, location = {Washington, D.C., USA}, pages = {118--126}, publisher = {ACM}, title = {Optimizing web search using web click-through data}, url = {http://portal.acm.org/citation.cfm?id=1031171.1031192}, year = 2004 } @inproceedings{1571950, abstract = {It is now widely recognized that user interactions with search results can provide substantial relevance information on the documents displayed in the search results. In this paper, we focus on extracting relevance information from one source of user interactions, i.e., user click data, which records the sequence of documents being clicked and not clicked in the result set during a user search session. We formulate the problem as a global ranking problem, emphasizing the importance of the sequential nature of user clicks, with the goal to predict the relevance labels of all the documents in a search session. This is distinct from conventional learning to rank methods that usually design a ranking model defined on a single document; in contrast, in our model the relational information among the documents as manifested by an aggregation of user clicks is exploited to rank all the documents jointly. In particular, we adapt several sequential supervised learning algorithms, including the conditional random field (CRF), the sliding window method and the recurrent sliding window method, to the global ranking problem. Experiments on the click data collected from a commercial search engine demonstrate that our methods can outperform the baseline models for search results re-ranking.}, address = {New York, NY, USA}, author = {Ji, Shihao and Zhou, Ke and Liao, Ciya and Zheng, Zhaohui and Xue, Gui-Rong and Chapelle, Olivier and Sun, Gordon and Zha, Hongyuan}, booktitle = {SIGIR '09: Proceedings of the 32nd international ACM SIGIR conference on Research and development in information retrieval}, doi = {10.1145/1571941.1571950}, interhash = {d5645d9369e43bbb712dc1eb9b4bd121}, intrahash = {ca66cc173e65ef7fe5b0cd9bfb8646aa}, isbn = {978-1-60558-483-6}, location = {Boston, MA, USA}, pages = {35--42}, publisher = {ACM}, title = {Global ranking by exploiting user clicks}, url = {http://portal.acm.org/citation.cfm?id=1571950}, year = 2009 }