@article{brody2006earlier, abstract = {The use of citation counts to assess the impact of research articles is well established. However, the citation impact of an article can only be measured several years after it has been published. As research articles are increasingly accessed through the Web, the number of times an article is downloaded can be instantly recorded and counted. One would expect the number of times an article is read to be related both to the number of times it is cited and to how old the article is. The authors analyze how short-term Web usage impact predicts medium-term citation impact. The physics e-print archive—arXiv.org—is used to test this.}, author = {Brody, Tim and Harnad, Stevan and Carr, Leslie}, doi = {10.1002/asi.20373}, interhash = {b4ae997250ae110bcc89826cb2a8205c}, intrahash = {643ec09ec9d1fd641c0416c3d8dde8f6}, issn = {1532-2890}, journal = {Journal of the American Society for Information Science and Technology}, number = 8, pages = {1060--1072}, publisher = {Wiley Subscription Services, Inc., A Wiley Company}, title = {Earlier Web usage statistics as predictors of later citation impact}, url = {http://dx.doi.org/10.1002/asi.20373}, volume = 57, year = 2006 } @inproceedings{saeed2008citation, abstract = {New developments in the collaborative and participatory role of Web has emerged new web based fast lane information systems like tagging and bookmarking applications. Same authors have shown elsewhere, that for same papers tags and bookmarks appear and gain volume very quickly in time as compared to citations and also hold good correlation with the citations. Studying the rank prediction models based on these systems gives advantage of gaining quick insight and localizing the highly productive and diffusible knowledge very early in time. This shows that it may be interesting to model the citation rank of a paper within the scope of a conference or journal issue, based on the bookmark counts (i-e count representing how many researchers have shown interest in a publication.) We used linear regression model for predicting citation ranks and compared both predicted citation rank models of bookmark counts and coauthor network counts for the papers of WWW06 conference. The results show that the rank prediction model based on bookmark counts is far better than the one based on coauthor network with mean absolute error for the first limited to the range of 5 and mean absolute error for second model above 18. Along with this we also compared the two bookmark prediction models out of which one was based on total citations rank as a dependent variable and the other was based on the adjusted citation rank. The citation rank was adjusted after subtracting the self and coauthor citations from total citations. The comparison reveals a significant improvement in the model and correlation after adjusting the citation rank. This may be interpreted that the bookmarking mechanisms represents the phenomenon similar to global discovery of a publication. While in the coauthor nets the papers are communicated personally and this communication or selection may not be captured within the bookmarking systems.}, author = {Saeed, A.U. and Afzal, M.T. and Latif, A. and Tochtermann, K.}, booktitle = {Multitopic Conference, 2008. INMIC 2008. IEEE International}, doi = {10.1109/INMIC.2008.4777769}, interhash = {26d1785cab132d577e377bb5bf299002}, intrahash = {677fc89fef6c79a6a4f25cb25246e38a}, month = dec, pages = {392-397}, title = {Citation rank prediction based on bookmark counts: Exploratory case study of WWW06 papers}, url = {http://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=4777769}, year = 2008 } @article{fu2008models, abstract = {The single most important bibliometric criterion for judging the impact of biomedical papers and their authors work is the number of citations received which is commonly referred to as citation count. This metric however is unavailable until several years after publication time. In the present work, we build computer models that accurately predict citation counts of biomedical publications within a deep horizon of ten years using only predictive information available at publication time. Our experiments show that it is indeed feasible to accurately predict future citation counts with a mixture of content-based and bibliometric features using machine learning methods. The models pave the way for practical prediction of the long-term impact of publication, and their statistical analysis provides greater insight into citation behavior.}, author = {Fu, Lawrence D. and Aliferis, Constantin}, interhash = {1eb972fa9ba9e255d6889b01532ea767}, intrahash = {39d155a532108bc71437451e31287943}, journal = {AMIA Annu Symp Proc}, pages = {222-226}, pmid = {18999029}, title = {Models for predicting and explaining citation count of biomedical articles}, url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2656101/}, year = 2008 } @article{lokker2008prediction, author = {Lokker, Cynthia and McKibbon, K Ann and McKinlay, R James and Wilczynski, Nancy L and Haynes, R Brian}, doi = {10.1136/bmj.39482.526713.BE}, interhash = {f5f066ee09051d862c1a1c9f34a832c0}, intrahash = {dece3577294846d48f198a6a5e6425c2}, journal = {BMJ}, month = {3}, number = 7645, pages = {655--657}, title = {Prediction of citation counts for clinical articles at two years using data available within three weeks of publication: retrospective cohort study}, volume = 336, year = 2008 } @article{hirsch2007index, abstract = {Bibliometric measures of individual scientific achievement are of particular interest if they can be used to predict future achievement. Here we report results of an empirical study of the predictive power of the h index compared with other indicators. Our findings indicate that the h index is better than other indicators considered (total citation count, citations per paper, and total paper count) in predicting future scientific achievement. We discuss reasons for the superiority of the h index.}, author = {Hirsch, J. E.}, doi = {10.1073/pnas.0707962104}, eprint = {http://www.pnas.org/content/104/49/19193.full.pdf+html}, interhash = {9bc6518ef60bb256ca78287a6c349f05}, intrahash = {43caaad4f117fc3f5c14d83b9082448e}, journal = {Proceedings of the National Academy of Sciences}, number = 49, pages = {19193-19198}, title = {Does the h index have predictive power?}, url = {http://www.pnas.org/content/104/49/19193.abstract}, volume = 104, year = 2007 } @inproceedings{yan2011citation, abstract = {In most of the cases, scientists depend on previous literature which is relevant to their research fields for developing new ideas. However, it is not wise, nor possible, to track all existed publications because the volume of literature collection grows extremely fast. Therefore, researchers generally follow, or cite merely a small proportion of publications which they are interested in. For such a large collection, it is rather interesting to forecast which kind of literature is more likely to attract scientists' response. In this paper, we use the citations as a measurement for the popularity among researchers and study the interesting problem of Citation Count Prediction (CCP) to examine the characteristics for popularity. Estimation of possible popularity is of great significance and is quite challenging. We have utilized several features of fundamental characteristics for those papers that are highly cited and have predicted the popularity degree of each literature in the future. We have implemented a system which takes a series of features of a particular publication as input and produces as output the estimated citation counts of that article after a given time period. We consider several regression models to formulate the learning process and evaluate their performance based on the coefficient of determination (R-square). Experimental results on a real-large data set show that the best predictive model achieves a mean average predictive performance of 0.740 measured in R-square, which significantly outperforms several alternative algorithms.}, acmid = {2063757}, address = {New York, NY, USA}, author = {Yan, Rui and Tang, Jie and Liu, Xiaobing and Shan, Dongdong and Li, Xiaoming}, booktitle = {Proceedings of the 20th ACM international conference on Information and knowledge management}, doi = {10.1145/2063576.2063757}, interhash = {71ec0933a36df3dd21f38285bdf9b1b0}, intrahash = {b0caabb6e17d9b790d3f13c897330aad}, isbn = {978-1-4503-0717-8}, location = {Glasgow, Scotland, UK}, numpages = {6}, pages = {1247--1252}, publisher = {ACM}, series = {CIKM '11}, title = {Citation count prediction: learning to estimate future citations for literature}, url = {http://doi.acm.org/10.1145/2063576.2063757}, year = 2011 }