@article{demvsar2006statistical, abstract = {While methods for comparing two learning algorithms on a single data set have been scrutinized for quite some time already, the issue of statistical tests for comparisons of more algorithms on multiple data sets, which is even more essential to typical machine learning studies, has been all but ignored. This article reviews the current practice and then theoretically and empirically examines several suitable tests. Based on that, we recommend a set of simple, yet safe and robust non-parametric tests for statistical comparisons of classifiers: the Wilcoxon signed ranks test for comparison of two classifiers and the Friedman test with the corresponding post-hoc tests for comparison of more classifiers over multiple data sets. Results of the latter can also be neatly presented with the newly introduced CD (critical difference) diagrams.}, acmid = {1248548}, author = {Dem\v{s}ar, Janez}, interhash = {337f48d386c60bd13ce70021894680ef}, intrahash = {93751bd0bfabffe38f799b9bb7f4c227}, issn = {1532-4435}, issue_date = {12/1/2006}, journal = {J. Mach. Learn. Res.}, month = dec, numpages = {30}, pages = {1--30}, publisher = {JMLR.org}, title = {Statistical Comparisons of Classifiers over Multiple Data Sets}, url = {http://dl.acm.org/citation.cfm?id=1248547.1248548}, volume = 7, year = 2006 } @article{brody2006earlier, abstract = {The use of citation counts to assess the impact of research articles is well established. However, the citation impact of an article can only be measured several years after it has been published. As research articles are increasingly accessed through the Web, the number of times an article is downloaded can be instantly recorded and counted. One would expect the number of times an article is read to be related both to the number of times it is cited and to how old the article is. The authors analyze how short-term Web usage impact predicts medium-term citation impact. The physics e-print archive—arXiv.org—is used to test this.}, author = {Brody, Tim and Harnad, Stevan and Carr, Leslie}, doi = {10.1002/asi.20373}, interhash = {b4ae997250ae110bcc89826cb2a8205c}, intrahash = {643ec09ec9d1fd641c0416c3d8dde8f6}, issn = {1532-2890}, journal = {Journal of the American Society for Information Science and Technology}, number = 8, pages = {1060--1072}, publisher = {Wiley Subscription Services, Inc., A Wiley Company}, title = {Earlier Web usage statistics as predictors of later citation impact}, url = {http://dx.doi.org/10.1002/asi.20373}, volume = 57, year = 2006 } @inproceedings{saeed2008citation, abstract = {New developments in the collaborative and participatory role of Web has emerged new web based fast lane information systems like tagging and bookmarking applications. Same authors have shown elsewhere, that for same papers tags and bookmarks appear and gain volume very quickly in time as compared to citations and also hold good correlation with the citations. Studying the rank prediction models based on these systems gives advantage of gaining quick insight and localizing the highly productive and diffusible knowledge very early in time. This shows that it may be interesting to model the citation rank of a paper within the scope of a conference or journal issue, based on the bookmark counts (i-e count representing how many researchers have shown interest in a publication.) We used linear regression model for predicting citation ranks and compared both predicted citation rank models of bookmark counts and coauthor network counts for the papers of WWW06 conference. The results show that the rank prediction model based on bookmark counts is far better than the one based on coauthor network with mean absolute error for the first limited to the range of 5 and mean absolute error for second model above 18. Along with this we also compared the two bookmark prediction models out of which one was based on total citations rank as a dependent variable and the other was based on the adjusted citation rank. The citation rank was adjusted after subtracting the self and coauthor citations from total citations. The comparison reveals a significant improvement in the model and correlation after adjusting the citation rank. This may be interpreted that the bookmarking mechanisms represents the phenomenon similar to global discovery of a publication. While in the coauthor nets the papers are communicated personally and this communication or selection may not be captured within the bookmarking systems.}, author = {Saeed, A.U. and Afzal, M.T. and Latif, A. and Tochtermann, K.}, booktitle = {Multitopic Conference, 2008. INMIC 2008. IEEE International}, doi = {10.1109/INMIC.2008.4777769}, interhash = {26d1785cab132d577e377bb5bf299002}, intrahash = {677fc89fef6c79a6a4f25cb25246e38a}, month = dec, pages = {392-397}, title = {Citation rank prediction based on bookmark counts: Exploratory case study of WWW06 papers}, url = {http://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=4777769}, year = 2008 } @inproceedings{scholz2013people, author = {Scholz, Christoph and Atzmueller, Martin and Kibanov, Mark and Stumme, Gerd}, booktitle = {ASONAM}, interhash = {8b6051b794789000c4baa5ab059fab18}, intrahash = {bf958ff2b11df1d9d15d9287ea07a5c9}, title = {How Do People Link? Analysis of Contact Structures in Human Face-to-Face Proximity Networks}, year = 2013 } @inproceedings{christophscholzandmartinatzmuellerandalainbarratandcirocattutoandgerdstumme2013insights, address = {Palo Alto, CA, USA}, author = {{Christoph Scholz and Martin Atzmueller and Alain Barrat and Ciro Cattuto and Gerd Stumme}}, booktitle = {Proc. 7th Intl. AAAI Conference on Weblogs and Social Media}, interhash = {18e166abcc948c86d6d4e9de9b204760}, intrahash = {3dce3522e260238f2a8e802b52096cb7}, optpages = {To appear}, publisher = {AAAI Press}, title = {{New Insights and Methods For Predicting Face-To-Face Contacts}}, year = 2013 } @article{fu2010using, abstract = {The most popular method for judging the impact of biomedical articles is citation count which is the number of citations received. The most significant limitation of citation count is that it cannot evaluate articles at the time of publication since citations accumulate over time. This work presents computer models that accurately predict citation counts of biomedical publications within a deep horizon of 10 years using only predictive information available at publication time. Our experiments show that it is indeed feasible to accurately predict future citation counts with a mixture of content-based and bibliometric features using machine learning methods. The models pave the way for practical prediction of the long-term impact of publication, and their statistical analysis provides greater insight into citation behavior.}, affiliation = {Center for Health Informatics and Bioinformatics, New York University Medical Center, 333 E. 38th St, 6th Floor, New York, NY 10016, USA}, author = {Fu, Lawrence D. and Aliferis, Constantin F.}, doi = {10.1007/s11192-010-0160-5}, interhash = {5502184494caab8c56056b7a9d92cb15}, intrahash = {e45088bdacbda5a5e8e6f293dcbca995}, issn = {0138-9130}, journal = {Scientometrics}, keyword = {Computer Science}, number = 1, pages = {257-270}, publisher = {Akadémiai Kiadó, co-published with Springer Science+Business Media B.V., Formerly Kluwer Academic Publishers B.V.}, title = {Using content-based and bibliometric features for machine learning models to predict citation counts in the biomedical literature}, url = {http://dx.doi.org/10.1007/s11192-010-0160-5}, volume = 85, year = 2010 } @inproceedings{Yan:2012:BSS:2232817.2232831, abstract = {Usually scientists breed research ideas inspired by previous publications, but they are unlikely to follow all publications in the unbounded literature collection. The volume of literature keeps on expanding extremely fast, whilst not all papers contribute equal impact to the academic society. Being aware of potentially influential literature would put one in an advanced position in choosing important research references. Hence, estimation of potential influence is of great significance. We study a challenging problem of identifying potentially influential literature. We examine a set of hypotheses on what are the fundamental characteristics for highly cited papers and find some interesting patterns. Based on these observations, we learn to identify potentially influential literature via Future Influence Prediction (FIP), which aims to estimate the future influence of literature. The system takes a series of features of a particular publication as input and produces as output the estimated citation counts of that article after a given time period. We consider several regression models to formulate the learning process and evaluate their performance based on the coefficient of determination (R2). Experimental results on a real-large data set show a mean average predictive performance of 83.6% measured in R^2. We apply the learned model to the application of bibliography recommendation and obtain prominent performance improvement in terms of Mean Average Precision (MAP).}, acmid = {2232831}, address = {New York, NY, USA}, author = {Yan, Rui and Huang, Congrui and Tang, Jie and Zhang, Yan and Li, Xiaoming}, booktitle = {Proceedings of the 12th ACM/IEEE-CS joint conference on Digital Libraries}, doi = {10.1145/2232817.2232831}, interhash = {85d10c6d37bcbfa057c51acc325a8116}, intrahash = {9269d2dd9bf4bc8c0e7c668011fcfc1b}, isbn = {978-1-4503-1154-0}, location = {Washington, DC, USA}, numpages = {10}, pages = {51--60}, publisher = {ACM}, series = {JCDL '12}, title = {To better stand on the shoulder of giants}, url = {http://doi.acm.org/10.1145/2232817.2232831}, year = 2012 } @inproceedings{yan2011citation, abstract = {In most of the cases, scientists depend on previous literature which is relevant to their research fields for developing new ideas. However, it is not wise, nor possible, to track all existed publications because the volume of literature collection grows extremely fast. Therefore, researchers generally follow, or cite merely a small proportion of publications which they are interested in. For such a large collection, it is rather interesting to forecast which kind of literature is more likely to attract scientists' response. In this paper, we use the citations as a measurement for the popularity among researchers and study the interesting problem of Citation Count Prediction (CCP) to examine the characteristics for popularity. Estimation of possible popularity is of great significance and is quite challenging. We have utilized several features of fundamental characteristics for those papers that are highly cited and have predicted the popularity degree of each literature in the future. We have implemented a system which takes a series of features of a particular publication as input and produces as output the estimated citation counts of that article after a given time period. We consider several regression models to formulate the learning process and evaluate their performance based on the coefficient of determination (R-square). Experimental results on a real-large data set show that the best predictive model achieves a mean average predictive performance of 0.740 measured in R-square, which significantly outperforms several alternative algorithms.}, acmid = {2063757}, address = {New York, NY, USA}, author = {Yan, Rui and Tang, Jie and Liu, Xiaobing and Shan, Dongdong and Li, Xiaoming}, booktitle = {Proceedings of the 20th ACM international conference on Information and knowledge management}, doi = {10.1145/2063576.2063757}, interhash = {71ec0933a36df3dd21f38285bdf9b1b0}, intrahash = {b0caabb6e17d9b790d3f13c897330aad}, isbn = {978-1-4503-0717-8}, location = {Glasgow, Scotland, UK}, numpages = {6}, pages = {1247--1252}, publisher = {ACM}, series = {CIKM '11}, title = {Citation count prediction: learning to estimate future citations for literature}, url = {http://doi.acm.org/10.1145/2063576.2063757}, year = 2011 } @inproceedings{SAS:12, address = {Boston, MA, USA}, author = {Scholz, Christoph and Atzmueller, Martin and Stumme, Gerd}, booktitle = {Proc. Fourth ASE/IEEE International Conference on Social Computing (SocialCom)}, interhash = {9bc5d42018dbe8b926be214190258b3c}, intrahash = {be5ae4b92170e7c595f5fdcac15b4786}, publisher = {IEEE Computer Society}, title = {{On the Predictability of Human Contacts: Influence Factors and the Strength of Stronger Ties}}, url = {http://www.kde.cs.uni-kassel.de/atzmueller/paper/scholz-on-f2f-predictability-socialcom-2012.pdf}, year = 2012 } @article{shannon1951prediction, author = {Shannon, Claude Elwood}, interhash = {daabc21c7f6e71f6e78a10c8d3492927}, intrahash = {2e79cf0f6022645a632b13e081b0b035}, journal = {Bell System Technical Journal}, month = jan, pages = {50--64}, title = {Prediction and Entropy of Printed English}, url = {http://languagelog.ldc.upenn.edu/myl/Shannon1950.pdf}, volume = 30, year = 1951 }