@article{fu2010using, abstract = {The most popular method for judging the impact of biomedical articles is citation count which is the number of citations received. The most significant limitation of citation count is that it cannot evaluate articles at the time of publication since citations accumulate over time. This work presents computer models that accurately predict citation counts of biomedical publications within a deep horizon of 10 years using only predictive information available at publication time. Our experiments show that it is indeed feasible to accurately predict future citation counts with a mixture of content-based and bibliometric features using machine learning methods. The models pave the way for practical prediction of the long-term impact of publication, and their statistical analysis provides greater insight into citation behavior.}, affiliation = {Center for Health Informatics and Bioinformatics, New York University Medical Center, 333 E. 38th St, 6th Floor, New York, NY 10016, USA}, author = {Fu, Lawrence D. and Aliferis, Constantin F.}, doi = {10.1007/s11192-010-0160-5}, interhash = {5502184494caab8c56056b7a9d92cb15}, intrahash = {e45088bdacbda5a5e8e6f293dcbca995}, issn = {0138-9130}, journal = {Scientometrics}, keyword = {Computer Science}, number = 1, pages = {257-270}, publisher = {Akadémiai Kiadó, co-published with Springer Science+Business Media B.V., Formerly Kluwer Academic Publishers B.V.}, title = {Using content-based and bibliometric features for machine learning models to predict citation counts in the biomedical literature}, url = {http://dx.doi.org/10.1007/s11192-010-0160-5}, volume = 85, year = 2010 } @inproceedings{Yan:2012:BSS:2232817.2232831, abstract = {Usually scientists breed research ideas inspired by previous publications, but they are unlikely to follow all publications in the unbounded literature collection. The volume of literature keeps on expanding extremely fast, whilst not all papers contribute equal impact to the academic society. Being aware of potentially influential literature would put one in an advanced position in choosing important research references. Hence, estimation of potential influence is of great significance. We study a challenging problem of identifying potentially influential literature. We examine a set of hypotheses on what are the fundamental characteristics for highly cited papers and find some interesting patterns. Based on these observations, we learn to identify potentially influential literature via Future Influence Prediction (FIP), which aims to estimate the future influence of literature. The system takes a series of features of a particular publication as input and produces as output the estimated citation counts of that article after a given time period. We consider several regression models to formulate the learning process and evaluate their performance based on the coefficient of determination (R2). Experimental results on a real-large data set show a mean average predictive performance of 83.6% measured in R^2. We apply the learned model to the application of bibliography recommendation and obtain prominent performance improvement in terms of Mean Average Precision (MAP).}, acmid = {2232831}, address = {New York, NY, USA}, author = {Yan, Rui and Huang, Congrui and Tang, Jie and Zhang, Yan and Li, Xiaoming}, booktitle = {Proceedings of the 12th ACM/IEEE-CS joint conference on Digital Libraries}, doi = {10.1145/2232817.2232831}, interhash = {85d10c6d37bcbfa057c51acc325a8116}, intrahash = {9269d2dd9bf4bc8c0e7c668011fcfc1b}, isbn = {978-1-4503-1154-0}, location = {Washington, DC, USA}, numpages = {10}, pages = {51--60}, publisher = {ACM}, series = {JCDL '12}, title = {To better stand on the shoulder of giants}, url = {http://doi.acm.org/10.1145/2232817.2232831}, year = 2012 } @article{Song19022010, abstract = {A range of applications, from predicting the spread of human and electronic viruses to city planning and resource management in mobile communications, depend on our ability to foresee the whereabouts and mobility of individuals, raising a fundamental question: To what degree is human behavior predictable? Here we explore the limits of predictability in human dynamics by studying the mobility patterns of anonymized mobile phone users. By measuring the entropy of each individual’s trajectory, we find a 93% potential predictability in user mobility across the whole user base. Despite the significant differences in the travel patterns, we find a remarkable lack of variability in predictability, which is largely independent of the distance users cover on a regular basis.}, author = {Song, Chaoming and Qu, Zehui and Blumm, Nicholas and Barabási, Albert-László}, doi = {10.1126/science.1177170}, eprint = {http://www.sciencemag.org/content/327/5968/1018.full.pdf}, interhash = {f2611a08bf6db54f86e884c05f3cb5fb}, intrahash = {a89330f8eb32ce62b5f5c9a2b4909f25}, journal = {Science}, number = 5968, pages = {1018-1021}, title = {Limits of Predictability in Human Mobility}, url = {http://www.sciencemag.org/content/327/5968/1018.abstract}, volume = 327, year = 2010 } @article{1117458, abstract = {Event-based network data consists of sets of events over time, each of which may involve multiple entities. Examples include email traffic, telephone calls, and research publications (interpreted as co-authorship events). Traditional network analysis techniques, such as social network models, often aggregate the relational information from each event into a single static network. In contrast, in this paper we focus on the temporal nature of such data. In particular, we look at the problems of temporal link prediction and node ranking, and describe new methods that illustrate opportunities for data mining and machine learning techniques in this context. Experimental results are discussed for a large set of co-authorship events measured over multiple years, and a large corporate email data set spanning 21 months.}, address = {New York, NY, USA}, author = {O'Madadhain, Joshua and Hutchins, Jon and Smyth, Padhraic}, doi = {10.1145/1117454.1117458}, interhash = {97a718ab9fe24625f7389939d2608d31}, intrahash = {89a23b31a476c4f3f771b5e3e4a8432c}, issn = {1931-0145}, journal = {SIGKDD Explor. Newsl.}, number = 2, pages = {23--30}, publisher = {ACM}, title = {Prediction and ranking algorithms for event-based network data}, url = {http://portal.acm.org/citation.cfm?id=1117458}, volume = 7, year = 2005 } @inproceedings{heymann2008social, abstract = {In this paper, we look at the "social tag prediction" problem. Given a set of objects, and a set of tags applied to those objects by users, can we predict whether a given tag could/should be applied to a particular object? We investigated this question using one of the largest crawls of the social bookmarking system del.icio.us gathered to date. For URLs in del.icio.us, we predicted tags based on page text, anchor text, surrounding hosts, and other tags applied to the URL. We found an entropy-based metric which captures the generality of a particular tag and informs an analysis of how well that tag can be predicted. We also found that tag-based association rules can produce very high-precision predictions as well as giving deeper understanding into the relationships between tags. Our results have implications for both the study of tagging systems as potential information retrieval tools, and for the design of such systems.}, address = {New York, NY, USA}, author = {Heymann, Paul and Ramage, Daniel and Garcia-Molina, Hector}, booktitle = {SIGIR '08: Proceedings of the 31st Annual International ACM SIGIR Conference on Research and Development in Information Retrieval}, doi = {http://doi.acm.org/10.1145/1390334.1390425}, interhash = {bb9455c80cc9bd8cf95c951a1318dabc}, intrahash = {0e6023e192f539fe4fce9894b1fbca5a}, isbn = {978-1-60558-164-4}, location = {Singapore, Singapore}, pages = {531--538}, publisher = {ACM}, title = {Social tag prediction}, url = {http://portal.acm.org/citation.cfm?id=1390334.1390425}, year = 2008 }