@article{adomavicius2012impact, abstract = {This article investigates the impact of rating data characteristics on the performance of several popular recommendation algorithms, including user-based and item-based collaborative filtering, as well as matrix factorization. We focus on three groups of data characteristics: rating space, rating frequency distribution, and rating value distribution. A sampling procedure was employed to obtain different rating data subsamples with varying characteristics; recommendation algorithms were used to estimate the predictive accuracy for each sample; and linear regression-based models were used to uncover the relationships between data characteristics and recommendation accuracy. Experimental results on multiple rating datasets show the consistent and significant effects of several data characteristics on recommendation accuracy.}, acmid = {2151166}, address = {New York, NY, USA}, articleno = {3}, author = {Adomavicius, Gediminas and Zhang, Jingjing}, doi = {10.1145/2151163.2151166}, interhash = {53e424cc9502ebb33d38de1d04230196}, intrahash = {e41453a56391ca382f2298607b361208}, issn = {2158-656X}, issue_date = {April 2012}, journal = {ACM Trans. Manage. Inf. Syst.}, month = apr, number = 1, numpages = {17}, pages = {3:1--3:17}, publisher = {ACM}, title = {Impact of Data Characteristics on Recommender Systems Performance}, url = {http://doi.acm.org/10.1145/2151163.2151166}, volume = 3, year = 2012 } @inproceedings{cremonesi2010performance, abstract = {In many commercial systems, the 'best bet' recommendations are shown, but the predicted rating values are not. This is usually referred to as a top-N recommendation task, where the goal of the recommender system is to find a few specific items which are supposed to be most appealing to the user. Common methodologies based on error metrics (such as RMSE) are not a natural fit for evaluating the top-N recommendation task. Rather, top-N performance can be directly measured by alternative methodologies based on accuracy metrics (such as precision/recall). An extensive evaluation of several state-of-the art recommender algorithms suggests that algorithms optimized for minimizing RMSE do not necessarily perform as expected in terms of top-N recommendation task. Results show that improvements in RMSE often do not translate into accuracy improvements. In particular, a naive non-personalized algorithm can outperform some common recommendation approaches and almost match the accuracy of sophisticated algorithms. Another finding is that the very few top popular items can skew the top-N performance. The analysis points out that when evaluating a recommender algorithm on the top-N recommendation task, the test set should be chosen carefully in order to not bias accuracy metrics towards non-personalized solutions. Finally, we offer practitioners new variants of two collaborative filtering algorithms that, regardless of their RMSE, significantly outperform other recommender algorithms in pursuing the top-N recommendation task, with offering additional practical advantages. This comes at surprise given the simplicity of these two methods.}, acmid = {1864721}, address = {New York, NY, USA}, author = {Cremonesi, Paolo and Koren, Yehuda and Turrin, Roberto}, booktitle = {Proceedings of the Fourth ACM Conference on Recommender Systems}, doi = {10.1145/1864708.1864721}, interhash = {04cb3373b65b03e03225f447250e7873}, intrahash = {aeab7f02942cfeb97ccc7ae0a1d60801}, isbn = {978-1-60558-906-0}, location = {Barcelona, Spain}, numpages = {8}, pages = {39--46}, publisher = {ACM}, series = {RecSys '10}, title = {Performance of Recommender Algorithms on Top-n Recommendation Tasks}, url = {http://doi.acm.org/10.1145/1864708.1864721}, year = 2010 } @inproceedings{korner2010categorizers, abstract = {While recent research has advanced our understanding about the structure and dynamics of social tagging systems, we know little about (i) the underlying motivations for tagging (why users tag), and (ii) how they influence the properties of resulting tags and folksonomies. In this paper, we focus on problem (i) based on a distinction between two types of user motivations that we have identified in earlier work: Categorizers vs. Describers. To that end, we systematically define and evaluate a number of measures designed to discriminate between describers, i.e. users who use tags for describing resources as opposed to categorizers, i.e. users who use tags for categorizing resources. Subsequently, we present empirical findings from qualitative and quantitative evaluations of the measures on real world tagging behavior. In addition, we conducted a recommender evaluation in which we study the effectiveness of each of the presented measures and found the measure based on the tag content to be the most accurate in predicting the user behavior closely followed by a content independent measure. The overall contribution of this paper is the presentation of empirical evidence that tagging motivation can be approximated with simple statistical measures. Our research is relevant for (a) designers of tagging systems aiming to better understand the motivations of their users and (b) researchers interested in studying the effects of users' tagging motivation on the properties of resulting tags and emergent structures in social tagging systems}, acmid = {1810645}, address = {New York, NY, USA}, author = {K\"{o}rner, Christian and Kern, Roman and Grahsl, Hans-Peter and Strohmaier, Markus}, booktitle = {Proceedings of the 21st ACM Conference on Hypertext and Hypermedia}, doi = {10.1145/1810617.1810645}, interhash = {ccca64b638181c35972c71e586ddc0c2}, intrahash = {87e3f9fa38eed6342454dcf47bb3e575}, isbn = {978-1-4503-0041-4}, location = {Toronto, Ontario, Canada}, numpages = {10}, pages = {157--166}, publisher = {ACM}, series = {HT '10}, title = {Of Categorizers and Describers: An Evaluation of Quantitative Measures for Tagging Motivation}, url = {http://doi.acm.org/10.1145/1810617.1810645}, year = 2010 } @inproceedings{doerfel2013analysis, abstract = {Since the rise of collaborative tagging systems on the web, the tag recommendation task -- suggesting suitable tags to users of such systems while they add resources to their collection -- has been tackled. However, the (offline) evaluation of tag recommendation algorithms usually suffers from difficulties like the sparseness of the data or the cold start problem for new resources or users. Previous studies therefore often used so-called post-cores (specific subsets of the original datasets) for their experiments. In this paper, we conduct a large-scale experiment in which we analyze different tag recommendation algorithms on different cores of three real-world datasets. We show, that a recommender's performance depends on the particular core and explore correlations between performances on different cores.}, acmid = {2507222}, address = {New York, NY, USA}, author = {Doerfel, Stephan and Jäschke, Robert}, booktitle = {Proceedings of the 7th ACM conference on Recommender systems}, doi = {10.1145/2507157.2507222}, interhash = {3eaf2beb1cdad39b7c5735a82c3338dd}, intrahash = {a73213a865503252caa4b28e88a77108}, isbn = {978-1-4503-2409-0}, location = {Hong Kong, China}, numpages = {4}, pages = {343--346}, publisher = {ACM}, series = {RecSys '13}, title = {An Analysis of Tag-Recommender Evaluation Procedures}, url = {http://doi.acm.org/10.1145/2507157.2507222}, year = 2013 } @preprint{beel2013research, author = {Beel, Joeran and Langer, Stefan and Genzmehr, Marcel and Gipp, Bela and Breitinger, Corinna and Nürnberger, Andreas}, interhash = {544758b1fd737c010643f529c4f48ae6}, intrahash = {4afa2bd342dda6b6d32713aa0fbc33bd}, title = {Research Paper Recommender System Evaluation: A Quantitative Literature Survey}, year = 2013 } @article{thelwall2012journal, abstract = {In theory, the web has the potential to provide information about the wider impact of academic research, beyond traditional scholarly impact. This is because the web can reflect non-scholarly uses of research, such as in online government documents, press coverage or public discussions. Nevertheless, there are practical problems with creating metrics for journals based on web data: principally that most such metrics should be easy for journal editors or publishers to manipulate. Nevertheless, two alternatives seem to have both promise and value: citations derived from digitised books and download counts for journals within specific delivery platforms.}, author = {Thelwall, Mike}, doi = {10.1007/s11192-012-0669-x}, interhash = {834707cf0663109f7811a14ae746be72}, intrahash = {284883bbaa636a0bab13fc54b903f363}, issn = {0138-9130}, journal = {Scientometrics}, language = {English}, number = 2, pages = {429-441}, publisher = {Springer Netherlands}, title = {Journal impact evaluation: a webometric perspective}, url = {http://dx.doi.org/10.1007/s11192-012-0669-x}, volume = 92, year = 2012 } @inproceedings{dominguezgarcia2012freset, abstract = {FReSET is a new recommender systems evaluation framework aiming to support research on folksonomy-based recommender systems. It provides interfaces for the implementation of folksonomy-based recommender systems and supports the consistent and reproducible offline evaluations on historical data. Unlike other recommender systems framework projects, the emphasis here is on providing a flexible framework allowing users to implement their own folksonomy-based recommender algorithms and pre-processing filtering methods rather than just providing a collection of collaborative filtering implementations. FReSET includes a graphical interface for result visualization and different cross-validation implementations to complement the basic functionality.}, acmid = {2365939}, address = {New York, NY, USA}, author = {Dom\'{\i}nguez Garc\'{\i}a, Renato and Bender, Matthias and Anjorin, Mojisola and Rensing, Christoph and Steinmetz, Ralf}, booktitle = {Proceedings of the 4th ACM RecSys workshop on Recommender systems and the social web}, doi = {10.1145/2365934.2365939}, interhash = {489207308b5d7f064163652763794ce6}, intrahash = {c78b033eb1b463ff00c4fc67ed8bf679}, isbn = {978-1-4503-1638-5}, location = {Dublin, Ireland}, numpages = {4}, pages = {25--28}, publisher = {ACM}, series = {RSWeb '12}, title = {FReSET: an evaluation framework for folksonomy-based recommender systems}, url = {http://doi.acm.org/10.1145/2365934.2365939}, year = 2012 } @inproceedings{parra2009evaluation, abstract = {Motivated by the potential use of collaborative tagging systems to develop new recommender systems, we have implemented and compared three variants of user-based collaborative filtering algorithms to provide recommendations of articles on CiteULike. On our first approach, Classic Collaborative filtering (CCF), we use Pearson correlation to calculate similarity between users and a classic adjusted ratings formula to rank the recommendations. Our second approach, Neighbor-weighted Collaborative Filtering (NwCF), incorporates the amount of raters in the ranking formula of the recommendations. A modified version of the Okapi BM25 IR model over users ’ tags is implemented on our third approach to form the user neighborhood. Our results suggest that incorporating the number of raters into the algorithms leads to an improvement of precision, and they also support that tags can be considered as an alternative to Pearson correlation to calculate the similarity between users and their neighbors in a collaborative tagging system. }, author = {Parra, Denis and Brusilovsky, Peter}, booktitle = {Proceedings of the Workshop on Web 3.0: Merging Semantic Web and Social Web}, interhash = {03a51e24ecab3ad66fcc381980144fea}, intrahash = {42773258c36ccf2f59749991518d1784}, issn = {1613-0073}, location = {Torino, Italy}, month = jun, series = {CEUR Workshop Proceedings}, title = {Evaluation of Collaborative Filtering Algorithms for Recommending Articles on CiteULike}, url = {http://ceur-ws.org/Vol-467/paper5.pdf}, volume = 467, year = 2009 } @article{shani2011evaluating, author = {Shani, G. and Gunawardana, A.}, interhash = {c93599e113544cde3f44502c88775c20}, intrahash = {63a1a401a35be851b9864966184c6815}, journal = {Recommender Systems Handbook}, pages = {257--297}, publisher = {Springer}, title = {Evaluating recommendation systems}, url = {http://scholar.google.de/scholar.bib?q=info:AW2lmZl44hMJ:scholar.google.com/&output=citation&hl=de&as_sdt=0,5&ct=citation&cd=0}, year = 2011 } @article{Go_Huang_Bhayani_2009, author = {Go, A and Huang, L and Bhayani, R}, interhash = {c462bf3fa792403429b46ec83efc2d06}, intrahash = {21e712d455a36a1125bd9bfe6c9383a8}, journal = {Entropy}, number = {June}, pages = 17, publisher = {Association for Computational Linguistics}, title = {Sentiment Analysis of Twitter Data}, url = {http://nlp.stanford.edu/courses/cs224n/2009/fp/3.pdf}, volume = 2009, year = 2009 } @article{Herlocker:2004:ECF:963770.963772, abstract = {Recommender systems have been evaluated in many, often incomparable, ways. In this article, we review the key decisions in evaluating collaborative filtering recommender systems: the user tasks being evaluated, the types of analysis and datasets being used, the ways in which prediction quality is measured, the evaluation of prediction attributes other than quality, and the user-based evaluation of the system as a whole. In addition to reviewing the evaluation strategies used by prior researchers, we present empirical results from the analysis of various accuracy metrics on one content domain where all the tested metrics collapsed roughly into three equivalence classes. Metrics within each equivalency class were strongly correlated, while metrics from different equivalency classes were uncorrelated.}, acmid = {963772}, address = {New York, NY, USA}, author = {Herlocker, Jonathan L. and Konstan, Joseph A. and Terveen, Loren G. and Riedl, John T.}, doi = {10.1145/963770.963772}, interhash = {f8a70731d983634ac7105896d101c9d2}, intrahash = {c3a659108a568db1fba183c680dd1fd2}, issn = {1046-8188}, issue = {1}, journal = {ACM Trans. Inf. Syst.}, month = {January}, numpages = {49}, pages = {5--53}, publisher = {ACM}, title = {Evaluating collaborative filtering recommender systems}, url = {http://doi.acm.org/10.1145/963770.963772}, volume = 22, year = 2004 } @inproceedings{taraborelli2008review, author = {Taraborelli, Dario}, booktitle = {Proceedings of the 8th International Conference on the Design of Cooperative Systems (COOP ’08)}, interhash = {b97c97a0b6d1aeaac597da2b2918dbfa}, intrahash = {b496bb7eab8e8191c10e4d706fbc7c5e}, title = {Soft peer review: Social software and distributed scientific evaluation}, url = {http://nitens.org/docs/spr_coop08.pdf}, year = 2008 }