@inproceedings{joachims2002optimizing, abstract = {This paper presents an approach to automatically optimizing the retrieval quality of search engines using clickthrough data. Intuitively, a good information retrieval system should present relevant documents high in the ranking, with less relevant documents following below. While previous approaches to learning retrieval functions from examples exist, they typically require training data generated from relevance judgments by experts. This makes them difficult and expensive to apply. The goal of this paper is to develop a method that utilizes clickthrough data for training, namely the query-log of the search engine in connection with the log of links the users clicked on in the presented ranking. Such clickthrough data is available in abundance and can be recorded at very low cost. Taking a Support Vector Machine (SVM) approach, this paper presents a method for learning retrieval functions. From a theoretical perspective, this method is shown to be well-founded in a risk minimization framework. Furthermore, it is shown to be feasible even for large sets of queries and features. The theoretical results are verified in a controlled experiment. It shows that the method can effectively adapt the retrieval function of a meta-search engine to a particular group of users, outperforming Google in terms of retrieval quality after only a couple of hundred training examples.}, acmid = {775067}, address = {New York, NY, USA}, author = {Joachims, Thorsten}, booktitle = {Proceedings of the eighth ACM SIGKDD international conference on Knowledge discovery and data mining}, doi = {10.1145/775047.775067}, interhash = {c78df69370bbf12636eaa5233b1fba83}, intrahash = {656a83f1057c5792506d0d656ae81d26}, isbn = {1-58113-567-X}, location = {Edmonton, Alberta, Canada}, numpages = {10}, pages = {133--142}, publisher = {ACM}, title = {Optimizing search engines using clickthrough data}, url = {http://doi.acm.org/10.1145/775047.775067}, year = 2002 } @inproceedings{navarrobullock2011tagging, abstract = {Learning-to-rank methods automatically generate ranking functions which can be used for ordering unknown resources according to their relevance for a specific search query. The training data to construct such a model consists of features describing a document-query-pair as well as relevance scores indicating how important the document is for the query. In general, these relevance scores are derived by asking experts to manually assess search results or by exploiting user search behaviour such as click data. The human evaluation of ranking results gives explicit relevance scores, but it is expensive to obtain. Clickdata can be logged from the user interaction with a search engine, but the feedback is noisy. In this paper, we want to explore a novel source of implicit feedback for web search: tagging data. Creating relevance feedback from tagging data leads to a further source of implicit relevance feedback which helps improve the reliability of automatically generated relevance scores and therefore the quality of learning-to-rank models.}, address = {New York, NY, USA}, author = {Navarro Bullock, Beate and Jäschke, Robert and Hotho, Andreas}, booktitle = {Proceedings of the ACM WebSci Conference}, interhash = {7afaa67dfeb07f7e0b85abf2be61aff1}, intrahash = {e5a4b67ed6173e9645aab321019efd74}, location = {Koblenz, Germany}, month = jun, organization = {ACM}, pages = {1--4}, title = {Tagging data as implicit feedback for learning-to-rank}, url = {http://journal.webscience.org/463/}, vgwort = {14,8}, year = 2011 }