@inproceedings{Dave:2003:MPG:775152.775226, acmid = {775226}, address = {New York, NY, USA}, author = {Dave, Kushal and Lawrence, Steve and Pennock, David M.}, booktitle = {Proceedings of the 12th International Conference on World Wide Web}, doi = {10.1145/775152.775226}, interhash = {2005853697772642c99672a13b0f27e9}, intrahash = {24644faf330140eaf12b43a0cded9e59}, isbn = {1-58113-680-3}, location = {Budapest, Hungary}, numpages = {10}, pages = {519--528}, publisher = {ACM}, series = {WWW '03}, title = {Mining the Peanut Gallery: Opinion Extraction and Semantic Classification of Product Reviews}, year = 2003 } @inproceedings{schein2002methods, abstract = {We have developed a method for recommending items that combines content and collaborative data under a single probabilistic framework. We benchmark our algorithm against a naïve Bayes classifier on the cold-start problem, where we wish to recommend items that no one in the community has yet rated. We systematically explore three testing methodologies using a publicly available data set, and explain how these methods apply to specific real-world applications. We advocate heuristic recommenders when benchmarking to give competent baseline performance. We introduce a new performance metric, the CROC curve, and demonstrate empirically that the various components of our testing strategy combine to obtain deeper understanding of the performance characteristics of recommender systems. Though the emphasis of our testing is on cold-start recommending, our methods for recommending and evaluation are general.}, acmid = {564421}, address = {New York, NY, USA}, author = {Schein, Andrew I. and Popescul, Alexandrin and Ungar, Lyle H. and Pennock, David M.}, booktitle = {Proceedings of the 25th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval}, doi = {10.1145/564376.564421}, interhash = {49cbb254d33a01f79dac175c41ec70d8}, intrahash = {eab2ae9f99bd5aed7ee66cd57b1cbc47}, isbn = {1-58113-561-0}, location = {Tampere, Finland}, numpages = {8}, pages = {253--260}, publisher = {ACM}, series = {SIGIR '02}, title = {Methods and Metrics for Cold-start Recommendations}, url = {http://doi.acm.org/10.1145/564376.564421}, year = 2002 } @inproceedings{popescul2001probabilistic, abstract = {Recommender systems leverage product and community information to target products to consumers. Researchers have developed collaborative recommenders, content-based recommenders, and a few hybrid systems. We propose a unified probabilistic framework for merging collaborative and content-based recommendations. We extend Hofmarm's (1999) aspect model to incorporate three-way co-occurrence data among users, items, and item content. The relative influence of collaboration data versus content data is not imposed as an exogenous parameter, but rather emerges naturally from the given data sources. However, global probabilistic models coupled with standard EM learning algorithms tend to drastically overfit in the sparsedata situations typical of recommendation applications. We show that secondary content information can often be used to overcome sparsity. Experiments on data from the Researchlndex library of Computer Science publications show that appropriate mixture models incorporating secondary data produce significantly better quality recommenders than k-nearest neighbors (k-NN). Global probabilistic models also allow more general inferences than local methods like k-NN.}, acmid = {2074076}, address = {San Francisco, CA, USA}, author = {Popescul, Alexandrin and Pennock, David M. and Lawrence, Steve}, booktitle = {Proceedings of the Seventeenth conference on Uncertainty in artificial intelligence}, interhash = {12df713559b21a24a649787ad76d9b74}, intrahash = {407a9c070710c3c3b7fe307d384aed37}, isbn = {1-55860-800-1}, location = {Seattle, Washington}, numpages = {8}, pages = {437--444}, publisher = {Morgan Kaufmann Publishers Inc.}, title = {Probabilistic models for unified collaborative and content-based recommendation in sparse-data environments}, url = {http://dl.acm.org/citation.cfm?id=2074022.2074076}, year = 2001 } @inproceedings{cosley2002referee, abstract = {Automated recommendation (e.g., personalized product recommendation on an ecommerce web site) is an increasingly valuable service associated with many databases--typically online retail catalogs and web logs. Currently, a major obstacle for evaluating recommendation algorithms is the lack of any standard, public, real-world testbed appropriate for the task. In an attempt to fill this gap, we have created REFEREE, a framework for building recommender systems using ResearchIndex--a huge online digital library of computer science research papers--so that anyone in the research community can develop, deploy, and evaluate recommender systems relatively easily and quickly. Research Index is in many ways ideal for evaluating recommender systems, especially so-called hybrid recommenders that combine information filtering and collaborative filtering techniques. The documents in the database are associated with a wealth of content information (author, title, abstract, full text) and collaborative information (user behaviors), as well as linkage information via the citation structure. Our framework supports more realistic evaluation metrics that assess user buy-in directly, rather than resorting to offline metrics like prediction accuracy that may have little to do with end user utility. The sheer scale of ResearchIndex (over 500,000 documents with thousands of user accesses per hour) will force algorithm designers to make real-world trade-offs that consider performance, not just accuracy. We present our own tradeoff decisions in building an example hybrid recommender called PD-Live. The algorithm uses content-based similarity information to select a set of documents from which to recommend, and collaborative information to rank the documents. PD-Live performs reasonably well compared to other recommenders in ResearchIndex.}, author = {Cosley, Dan and Lawrence, Steve and Pennock, David M.}, booktitle = {VLDB '02: Proceedings of the 28th international conference on Very Large Data Bases}, interhash = {cd87b54cacc63e242421dc2ecde84926}, intrahash = {f5008da10f55ecc550525a3d49c45944}, location = {Hong Kong, China}, pages = {35--46}, publisher = {VLDB Endowment}, title = {REFEREE: an open framework for practical testing of recommender systems using ResearchIndex}, url = {http://portal.acm.org/citation.cfm?id=1287369.1287374}, year = 2002 } @inproceedings{conf/icdm/PopesculULP03, author = {Popescul, Alexandrin and Ungar, Lyle H. and Lawrence, Steve and Pennock, David M.}, booktitle = {ICDM}, crossref = {conf/icdm/2003}, date = {2004-01-28}, ee = {http://csdl.computer.org/comp/proceedings/icdm/2003/1978/00/19780275abs.htm}, interhash = {3bcb76c6628b1752db555f86fe39429e}, intrahash = {7cdd6b0791fcdf17ec6d404b55f12c5c}, isbn = {0-7695-1978-4}, pages = {275-282}, publisher = {IEEE Computer Society}, title = {Statistical Relational Learning for Document Mining.}, url = {http://www.cis.upenn.edu/~popescul/Publications/popescul03dm.pdf}, year = 2003 }