@article{chen2007reputation, abstract = {In this paper, we propose a user reputation model and apply it to a user-interactive question answering system. It combines the social network analysis approach and the user rating approach. Social network analysis is applied to analyze the impact of participant users' relations to their reputations. User rating is used to acquire direct judgment of a user's reputation based on other users' experiences with this user. Preliminary experiments show that the computed reputations based on our proposed reputation model can reflect the actual reputations of the simulated roles and therefore can fit in well with our user-interactive question answering system. Copyright © 2006 John Wiley & Sons, Ltd.}, author = {Chen, Wei and Zeng, Qingtian and Wenyin, Liu and Hao, Tianyong}, doi = {10.1002/cpe.1142}, interhash = {c304f655ee6ee183e07192b9fed0d618}, intrahash = {858df3646b706ce6308a12cbf1585d58}, issn = {1532-0634}, journal = {Concurrency and Computation: Practice and Experience}, number = 15, pages = {2091--2103}, publisher = {John Wiley & Sons, Ltd.}, title = {A user reputation model for a user-interactive question answering system}, url = {http://dx.doi.org/10.1002/cpe.1142}, volume = 19, year = 2007 } @inproceedings{Shen:2004:WCT:1008992.1009035, abstract = {Web-page classification is much more difficult than pure-text classification due to a large variety of noisy information embedded in Web pages. In this paper, we propose a new Web-page classification algorithm based on Web summarization for improving the accuracy. We first give empirical evidence that ideal Web-page summaries generated by human editors can indeed improve the performance of Web-page classification algorithms. We then propose a new Web summarization-based classification algorithm and evaluate it along with several other state-of-the-art text summarization algorithms on the LookSmart Web directory. Experimental results show that our proposed summarization-based classification algorithm achieves an approximately 8.8% improvement as compared to pure-text-based classification algorithm. We further introduce an ensemble classifier using the improved summarization algorithm and show that it achieves about 12.9% improvement over pure-text based methods.}, acmid = {1009035}, address = {New York, NY, USA}, author = {Shen, Dou and Chen, Zheng and Yang, Qiang and Zeng, Hua-Jun and Zhang, Benyu and Lu, Yuchang and Ma, Wei-Ying}, booktitle = {Proceedings of the 27th annual international ACM SIGIR conference on Research and development in information retrieval}, doi = {10.1145/1008992.1009035}, interhash = {328ff5b51cb573cd1d253f339892c029}, intrahash = {b83fca9d43e5afdea78b9791cc07890c}, isbn = {1-58113-881-4}, location = {Sheffield, United Kingdom}, numpages = {8}, pages = {242--249}, publisher = {ACM}, series = {SIGIR '04}, title = {Web-page classification through summarization}, url = {http://doi.acm.org/10.1145/1008992.1009035}, year = 2004 } @inproceedings{liu2005experimental, abstract = {Taxonomies of the Web typically have hundreds of thousands of categories and skewed category distribution over documents. It is not clear whether existing text classification technologies can perform well on and scale up to such large-scale applications. To understand this, we conducted the evaluation of several representative methods (Support Vector Machines, k-Nearest Neighbor and Naive Bayes) with Yahoo! taxonomies. In particular, we evaluated the effectiveness/efficiency tradeoff in classifiers with hierarchical setting compared to conventional (flat) setting, and tested popular threshold tuning strategies for their scalability and accuracy in large-scale classification problems.}, acmid = {1062891}, address = {New York, NY, USA}, author = {LIU, Tie-Yan and YANG, Yiming and WAN, Hao and ZHOU, Qian and GAO, Bin and ZENG, Hua-Jun and CHEN, Zheng and MA, Wei-Ying}, booktitle = {Special interest tracks and posters of the 14th international conference on World Wide Web}, doi = {10.1145/1062745.1062891}, interhash = {e581e4dd2ed6d748031a812c724c4b7c}, intrahash = {36cc9f92b9c722b2aff441b23e44b2f7}, isbn = {1-59593-051-5}, location = {Chiba, Japan}, numpages = {2}, pages = {1106--1107}, publisher = {ACM}, series = {WWW '05}, title = {An experimental study on large-scale web categorization}, url = {http://doi.acm.org/10.1145/1062745.1062891}, year = 2005 } @inproceedings{Dumais:2000:HCW:345508.345593, acmid = {345593}, address = {New York, NY, USA}, author = {Dumais, Susan and Chen, Hao}, booktitle = {Proceedings of the 23rd annual international ACM SIGIR conference on Research and development in information retrieval}, doi = {10.1145/345508.345593}, interhash = {1051e6db3c79db59699a253138bb3b64}, intrahash = {be8e5ee591f98d95ff6ee2f2f227e3be}, isbn = {1-58113-226-3}, location = {Athens, Greece}, numpages = {8}, pages = {256--263}, publisher = {ACM}, series = {SIGIR '00}, title = {Hierarchical classification of Web content}, url = {http://doi.acm.org/10.1145/345508.345593}, year = 2000 } @inproceedings{Lu:2009:ETN:1645953.1646167, abstract = {In this poster, we investigate how to enhance web clustering by leveraging the tripartite network of social tagging systems. We propose a clustering method, called "Tripartite Clustering", which cluster the three types of nodes (resources, users and tags) simultaneously based on the links in the social tagging network. The proposed method is experimented on a real-world social tagging dataset sampled from del.icio.us. We also compare the proposed clustering approach with K-means. All the clustering results are evaluated against a human-maintained web directory. The experimental results show that Tripartite Clustering significantly outperforms the content-based K-means approach and achieves performance close to that of social annotation-based K-means whereas generating much more useful information.}, acmid = {1646167}, address = {New York, NY, USA}, author = {Lu, Caimei and Chen, Xin and Park, E. K.}, booktitle = {Proceeding of the 18th ACM conference on Information and knowledge management}, doi = {10.1145/1645953.1646167}, interhash = {e192e53972f28d78f1ecbffbfea08bed}, intrahash = {86160cf68758ec60922323a34a7833f0}, isbn = {978-1-60558-512-3}, location = {Hong Kong, China}, numpages = {4}, pages = {1545--1548}, publisher = {ACM}, series = {CIKM '09}, title = {Exploit the tripartite network of social tagging for web clustering}, url = {http://doi.acm.org/10.1145/1645953.1646167}, year = 2009 } @inproceedings{1031192, abstract = {The performance of web search engines may often deteriorate due to the diversity and noisy information contained within web pages. User click-through data can be used to introduce more accurate description (metadata) for web pages, and to improve the search performance. However, noise and incompleteness, sparseness, and the volatility of web pages and queries are three major challenges for research work on user click-through log mining. In this paper, we propose a novel iterative reinforced algorithm to utilize the user click-through data to improve search performance. The algorithm fully explores the interrelations between queries and web pages, and effectively finds "virtual queries" for web pages and overcomes the challenges discussed above. Experiment results on a large set of MSN click-through log data show a significant improvement on search performance over the naive query log mining algorithm as well as the baseline search engine.}, address = {New York, NY, USA}, author = {Xue, Gui-Rong and Zeng, Hua-Jun and Chen, Zheng and Yu, Yong and Ma, Wei-Ying and Xi, WenSi and Fan, WeiGuo}, booktitle = {CIKM '04: Proceedings of the thirteenth ACM international conference on Information and knowledge management}, doi = {10.1145/1031171.1031192}, interhash = {31bb65c1b57888b0529c1a11e981bbe8}, intrahash = {2c9841b484ade7e9a8c9220662190c16}, isbn = {1-58113-874-1}, location = {Washington, D.C., USA}, pages = {118--126}, publisher = {ACM}, title = {Optimizing web search using web click-through data}, url = {http://portal.acm.org/citation.cfm?id=1031171.1031192}, year = 2004 }