@article{fu2010using, abstract = {The most popular method for judging the impact of biomedical articles is citation count which is the number of citations received. The most significant limitation of citation count is that it cannot evaluate articles at the time of publication since citations accumulate over time. This work presents computer models that accurately predict citation counts of biomedical publications within a deep horizon of 10 years using only predictive information available at publication time. Our experiments show that it is indeed feasible to accurately predict future citation counts with a mixture of content-based and bibliometric features using machine learning methods. The models pave the way for practical prediction of the long-term impact of publication, and their statistical analysis provides greater insight into citation behavior.}, affiliation = {Center for Health Informatics and Bioinformatics, New York University Medical Center, 333 E. 38th St, 6th Floor, New York, NY 10016, USA}, author = {Fu, Lawrence D. and Aliferis, Constantin F.}, doi = {10.1007/s11192-010-0160-5}, interhash = {5502184494caab8c56056b7a9d92cb15}, intrahash = {e45088bdacbda5a5e8e6f293dcbca995}, issn = {0138-9130}, journal = {Scientometrics}, keyword = {Computer Science}, number = 1, pages = {257-270}, publisher = {Akadémiai Kiadó, co-published with Springer Science+Business Media B.V., Formerly Kluwer Academic Publishers B.V.}, title = {Using content-based and bibliometric features for machine learning models to predict citation counts in the biomedical literature}, url = {http://dx.doi.org/10.1007/s11192-010-0160-5}, volume = 85, year = 2010 } @inproceedings{Yan:2012:BSS:2232817.2232831, abstract = {Usually scientists breed research ideas inspired by previous publications, but they are unlikely to follow all publications in the unbounded literature collection. The volume of literature keeps on expanding extremely fast, whilst not all papers contribute equal impact to the academic society. Being aware of potentially influential literature would put one in an advanced position in choosing important research references. Hence, estimation of potential influence is of great significance. We study a challenging problem of identifying potentially influential literature. We examine a set of hypotheses on what are the fundamental characteristics for highly cited papers and find some interesting patterns. Based on these observations, we learn to identify potentially influential literature via Future Influence Prediction (FIP), which aims to estimate the future influence of literature. The system takes a series of features of a particular publication as input and produces as output the estimated citation counts of that article after a given time period. We consider several regression models to formulate the learning process and evaluate their performance based on the coefficient of determination (R2). Experimental results on a real-large data set show a mean average predictive performance of 83.6% measured in R^2. We apply the learned model to the application of bibliography recommendation and obtain prominent performance improvement in terms of Mean Average Precision (MAP).}, acmid = {2232831}, address = {New York, NY, USA}, author = {Yan, Rui and Huang, Congrui and Tang, Jie and Zhang, Yan and Li, Xiaoming}, booktitle = {Proceedings of the 12th ACM/IEEE-CS joint conference on Digital Libraries}, doi = {10.1145/2232817.2232831}, interhash = {85d10c6d37bcbfa057c51acc325a8116}, intrahash = {9269d2dd9bf4bc8c0e7c668011fcfc1b}, isbn = {978-1-4503-1154-0}, location = {Washington, DC, USA}, numpages = {10}, pages = {51--60}, publisher = {ACM}, series = {JCDL '12}, title = {To better stand on the shoulder of giants}, url = {http://doi.acm.org/10.1145/2232817.2232831}, year = 2012 } @inproceedings{yan2011citation, abstract = {In most of the cases, scientists depend on previous literature which is relevant to their research fields for developing new ideas. However, it is not wise, nor possible, to track all existed publications because the volume of literature collection grows extremely fast. Therefore, researchers generally follow, or cite merely a small proportion of publications which they are interested in. For such a large collection, it is rather interesting to forecast which kind of literature is more likely to attract scientists' response. In this paper, we use the citations as a measurement for the popularity among researchers and study the interesting problem of Citation Count Prediction (CCP) to examine the characteristics for popularity. Estimation of possible popularity is of great significance and is quite challenging. We have utilized several features of fundamental characteristics for those papers that are highly cited and have predicted the popularity degree of each literature in the future. We have implemented a system which takes a series of features of a particular publication as input and produces as output the estimated citation counts of that article after a given time period. We consider several regression models to formulate the learning process and evaluate their performance based on the coefficient of determination (R-square). Experimental results on a real-large data set show that the best predictive model achieves a mean average predictive performance of 0.740 measured in R-square, which significantly outperforms several alternative algorithms.}, acmid = {2063757}, address = {New York, NY, USA}, author = {Yan, Rui and Tang, Jie and Liu, Xiaobing and Shan, Dongdong and Li, Xiaoming}, booktitle = {Proceedings of the 20th ACM international conference on Information and knowledge management}, doi = {10.1145/2063576.2063757}, interhash = {71ec0933a36df3dd21f38285bdf9b1b0}, intrahash = {b0caabb6e17d9b790d3f13c897330aad}, isbn = {978-1-4503-0717-8}, location = {Glasgow, Scotland, UK}, numpages = {6}, pages = {1247--1252}, publisher = {ACM}, series = {CIKM '11}, title = {Citation count prediction: learning to estimate future citations for literature}, url = {http://doi.acm.org/10.1145/2063576.2063757}, year = 2011 }