@inproceedings{liu2008browserank, abstract = {This paper proposes a new method for computing page importance, referred to as BrowseRank. The conventional approach to compute page importance is to exploit the link graph of the web and to build a model based on that graph. For instance, PageRank is such an algorithm, which employs a discrete-time Markov process as the model. Unfortunately, the link graph might be incomplete and inaccurate with respect to data for determining page importance, because links can be easily added and deleted by web content creators. In this paper, we propose computing page importance by using a 'user browsing graph' created from user behavior data. In this graph, vertices represent pages and directed edges represent transitions between pages in the users' web browsing history. Furthermore, the lengths of staying time spent on the pages by users are also included. The user browsing graph is more reliable than the link graph for inferring page importance. This paper further proposes using the continuous-time Markov process on the user browsing graph as a model and computing the stationary probability distribution of the process as page importance. An efficient algorithm for this computation has also been devised. In this way, we can leverage hundreds of millions of users' implicit voting on page importance. Experimental results show that BrowseRank indeed outperforms the baseline methods such as PageRank and TrustRank in several tasks.}, acmid = {1390412}, address = {New York, NY, USA}, author = {Liu, Yuting and Gao, Bin and Liu, Tie-Yan and Zhang, Ying and Ma, Zhiming and He, Shuyuan and Li, Hang}, booktitle = {Proceedings of the 31st annual international ACM SIGIR conference on Research and development in information retrieval}, doi = {10.1145/1390334.1390412}, interhash = {cc998339ed040e7a8e4898a9072f3351}, intrahash = {2eeaa8699f0c0e86c47fbbbf445f6022}, isbn = {978-1-60558-164-4}, location = {Singapore, Singapore}, numpages = {8}, pages = {451--458}, publisher = {ACM}, title = {BrowseRank: letting web users vote for page importance}, url = {http://doi.acm.org/10.1145/1390334.1390412}, year = 2008 } @inproceedings{liu2005experimental, abstract = {Taxonomies of the Web typically have hundreds of thousands of categories and skewed category distribution over documents. It is not clear whether existing text classification technologies can perform well on and scale up to such large-scale applications. To understand this, we conducted the evaluation of several representative methods (Support Vector Machines, k-Nearest Neighbor and Naive Bayes) with Yahoo! taxonomies. In particular, we evaluated the effectiveness/efficiency tradeoff in classifiers with hierarchical setting compared to conventional (flat) setting, and tested popular threshold tuning strategies for their scalability and accuracy in large-scale classification problems.}, acmid = {1062891}, address = {New York, NY, USA}, author = {LIU, Tie-Yan and YANG, Yiming and WAN, Hao and ZHOU, Qian and GAO, Bin and ZENG, Hua-Jun and CHEN, Zheng and MA, Wei-Ying}, booktitle = {Special interest tracks and posters of the 14th international conference on World Wide Web}, doi = {10.1145/1062745.1062891}, interhash = {e581e4dd2ed6d748031a812c724c4b7c}, intrahash = {36cc9f92b9c722b2aff441b23e44b2f7}, isbn = {1-59593-051-5}, location = {Chiba, Japan}, numpages = {2}, pages = {1106--1107}, publisher = {ACM}, series = {WWW '05}, title = {An experimental study on large-scale web categorization}, url = {http://doi.acm.org/10.1145/1062745.1062891}, year = 2005 } @inproceedings{zhao2006timedependent, abstract = {It has become a promising direction to measure similarity of Web search queries by mining the increasing amount of click-through data logged by Web search engines, which record the interactions between users and the search engines. Most existing approaches employ the click-through data for similarity measure of queries with little consideration of the temporal factor, while the click-through data is often dynamic and contains rich temporal information. In this paper we present a new framework of time-dependent query semantic similarity model on exploiting the temporal characteristics of historical click-through data. The intuition is that more accurate semantic similarity values between queries can be obtained by taking into account the timestamps of the log data. With a set of user-defined calendar schema and calendar patterns, our time-dependent query similarity model is constructed using the marginalized kernel technique, which can exploit both explicit similarity and implicit semantics from the click-through data effectively. Experimental results on a large set of click-through data acquired from a commercial search engine show that our time-dependent query similarity model is more accurate than the existing approaches. Moreover, we observe that our time-dependent query similarity model can, to some extent, reflect real-world semantics such as real-world events that are happening over time.}, address = {New York, NY, USA}, author = {Zhao, Qiankun and Hoi, Steven C. H. and Liu, Tie-Yan and Bhowmick, Sourav S. and Lyu, Michael R. and Ma, Wei-Ying}, booktitle = {WWW '06: Proceedings of the 15th international conference on World Wide Web}, doi = {10.1145/1135777.1135858}, interhash = {c765e101c37f6b530e2c1c59808048d7}, intrahash = {57cbc64550d3a1b5b8599a0783e95111}, isbn = {1-59593-323-9}, location = {Edinburgh, Scotland}, pages = {543--552}, publisher = {ACM}, title = {Time-dependent semantic similarity measure of queries using historical click-through data}, url = {http://portal.acm.org/citation.cfm?id=1135777.1135858}, year = 2006 } @inproceedings{zhao2006timedependent, abstract = {It has become a promising direction to measure similarity of Web search queries by mining the increasing amount of click-through data logged by Web search engines, which record the interactions between users and the search engines. Most existing approaches employ the click-through data for similarity measure of queries with little consideration of the temporal factor, while the click-through data is often dynamic and contains rich temporal information. In this paper we present a new framework of time-dependent query semantic similarity model on exploiting the temporal characteristics of historical click-through data. The intuition is that more accurate semantic similarity values between queries can be obtained by taking into account the timestamps of the log data. With a set of user-defined calendar schema and calendar patterns, our time-dependent query similarity model is constructed using the marginalized kernel technique, which can exploit both explicit similarity and implicit semantics from the click-through data effectively. Experimental results on a large set of click-through data acquired from a commercial search engine show that our time-dependent query similarity model is more accurate than the existing approaches. Moreover, we observe that our time-dependent query similarity model can, to some extent, reflect real-world semantics such as real-world events that are happening over time.}, address = {New York, NY, USA}, author = {Zhao, Qiankun and Hoi, Steven C. H. and Liu, Tie-Yan and Bhowmick, Sourav S. and Lyu, Michael R. and Ma, Wei-Ying}, booktitle = {WWW '06: Proceedings of the 15th international conference on World Wide Web}, doi = {10.1145/1135777.1135858}, interhash = {c765e101c37f6b530e2c1c59808048d7}, intrahash = {57cbc64550d3a1b5b8599a0783e95111}, isbn = {1-59593-323-9}, location = {Edinburgh, Scotland}, pages = {543--552}, publisher = {ACM}, title = {Time-dependent semantic similarity measure of queries using historical click-through data}, url = {http://portal.acm.org/citation.cfm?id=1135777.1135858}, year = 2006 } @inproceedings{1135858, abstract = {It has become a promising direction to measure similarity of Web search queries by mining the increasing amount of click-through data logged by Web search engines, which record the interactions between users and the search engines. Most existing approaches employ the click-through data for similarity measure of queries with little consideration of the temporal factor, while the click-through data is often dynamic and contains rich temporal information. In this paper we present a new framework of time-dependent query semantic similarity model on exploiting the temporal characteristics of historical click-through data. The intuition is that more accurate semantic similarity values between queries can be obtained by taking into account the timestamps of the log data. With a set of user-defined calendar schema and calendar patterns, our time-dependent query similarity model is constructed using the marginalized kernel technique, which can exploit both explicit similarity and implicit semantics from the click-through data effectively. Experimental results on a large set of click-through data acquired from a commercial search engine show that our time-dependent query similarity model is more accurate than the existing approaches. Moreover, we observe that our time-dependent query similarity model can, to some extent, reflect real-world semantics such as real-world events that are happening over time.}, address = {New York, NY, USA}, author = {Zhao, Qiankun and Hoi, Steven C. H. and Liu, Tie-Yan and Bhowmick, Sourav S. and Lyu, Michael R. and Ma, Wei-Ying}, booktitle = {WWW '06: Proceedings of the 15th international conference on World Wide Web}, doi = {http://doi.acm.org/10.1145/1135777.1135858}, interhash = {c765e101c37f6b530e2c1c59808048d7}, intrahash = {57cbc64550d3a1b5b8599a0783e95111}, isbn = {1-59593-323-9}, location = {Edinburgh, Scotland}, pages = {543--552}, publisher = {ACM Press}, title = {Time-dependent semantic similarity measure of queries using historical click-through data}, url = {http://portal.acm.org/citation.cfm?id=1135858}, year = 2006 } @inproceedings{1148187, address = {New York, NY, USA}, author = {Feng, Guang and Liu, Tie-Yan and Wang, Ying and Bao, Ying and Ma, Zhiming and Zhang, Xu-Dong and Ma, Wei-Ying}, booktitle = {SIGIR '06: Proceedings of the 29th annual international ACM SIGIR conference on Research and development in information retrieval}, doi = {http://doi.acm.org/10.1145/1148170.1148187}, interhash = {21d1ae55f77976e7d7c97f2d579a0d15}, intrahash = {683a3288d7e356c98ab0d67e9f42d426}, isbn = {1-59593-369-7}, location = {Seattle, Washington, USA}, pages = {75--82}, publisher = {ACM Press}, title = {AggregateRank: bringing order to web sites}, year = 2006 }