@article{brzezinski2015power, abstract = {Modeling distributions of citations to scientific papers is crucial for understanding how science develops. However, there is a considerable empirical controversy on which statistical model fits the citation distributions best. This paper is concerned with rigorous empirical detection of power-law behaviour in the distribution of citations received by the most highly cited scientific papers. We have used a large, novel data set on citations to scientific papers published between 1998 and 2002 drawn from Scopus. The power-law model is compared with a number of alternative models using a likelihood ratio test. We have found that the power-law hypothesis is rejected for around half of the Scopus fields of science. For these fields of science, the Yule, power-law with exponential cut-off and log-normal distributions seem to fit the data better than the pure power-law model. On the other hand, when the power-law hypothesis is not rejected, it is usually empirically indistinguishable from most of the alternative models. The pure power-law model seems to be the best model only for the most highly cited papers in “Physics and Astronomy”. Overall, our results seem to support theories implying that the most highly cited scientific papers follow the Yule, power-law with exponential cut-off or log-normal distribution. Our findings suggest also that power laws in citation distributions, when present, account only for a very small fraction of the published papers (less than 1 % for most of science fields) and that the power-law scaling parameter (exponent) is substantially higher (from around 3.2 to around 4.7) than found in the older literature.}, author = {Brzezinski, Michal}, doi = {10.1007/s11192-014-1524-z}, interhash = {b162eddb3ff76a9eef5daf450da934c0}, intrahash = {8ef9a6fbfcca3d599ca500cf4f9a2e39}, issn = {0138-9130}, journal = {Scientometrics}, language = {English}, number = 1, pages = {213-228}, publisher = {Springer Netherlands}, title = {Power laws in citation distributions: evidence from Scopus}, url = {http://dx.doi.org/10.1007/s11192-014-1524-z}, volume = 103, year = 2015 } @article{albarrn2011references, abstract = {This article studies massive evidence about references made and citations received after a 5-year citation window by 3.7 million articles published in 1998 to 2002 in 22 scientific fields. We find that the distributions of references made and citations received share a number of basic features across sciences. Reference distributions are rather skewed to the right while citation distributions are even more highly skewed: The mean is about 20 percentage points to the right of the median, and articles with a remarkable or an outstanding number of citations represent about 9% of the total. Moreover, the existence of a power law representing the upper tail of citation distributions cannot be rejected in 17 fields whose articles represent 74.7% of the total. Contrary to the evidence in other contexts, the value of the scale parameter is above 3.5 in 13 of the 17 cases. Finally, power laws are typically small, but capture a considerable proportion of the total citations received.}, author = {Albarrán, Pedro and Ruiz-Castillo, Javier}, doi = {10.1002/asi.21448}, interhash = {79502663727fcbd4834a423f4e3212a3}, intrahash = {f20e50e960696bab3b39b628718dd850}, issn = {1532-2890}, journal = {Journal of the American Society for Information Science and Technology}, number = 1, pages = {40--49}, publisher = {Wiley Subscription Services, Inc., A Wiley Company}, title = {References made and citations received by scientific articles}, url = {http://dx.doi.org/10.1002/asi.21448}, volume = 62, year = 2011 } @article{cerinek2015network, abstract = {We analyze the data about works (papers, books) from the time period 1990–2010 that are collected in Zentralblatt MATH database. The data were converted into four 2-mode networks (works }, author = {Cerinšek, Monika and Batagelj, Vladimir}, doi = {10.1007/s11192-014-1419-z}, interhash = {e65f748684210857bb19dc7f69d65f86}, intrahash = {bcba93fd0e6381289c489cbab20bbec7}, issn = {0138-9130}, journal = {Scientometrics}, language = {English}, number = 1, pages = {977-1001}, publisher = {Springer Netherlands}, title = {Network analysis of Zentralblatt MATH data}, url = {http://dx.doi.org/10.1007/s11192-014-1419-z}, volume = 102, year = 2015 } @article{bonzi1991motivations, abstract = {The citation motivations among 51 self citing authors in several natural science disciplines were investigated. Results of a survey on reasons for both self citation and citation to others show that there are very few differences in motivation, and that there are plausible intellectual grounds for those differences which are substantial. Analysis of exposure in text reveals virtually no differences between self citations and citations to others. Analysis of individual disciplines also uncover no substantive differences in either motivation or exposure in text.}, author = {Bonzi, Susan and Snyder, H.W.}, doi = {10.1007/BF02017571}, interhash = {b531a253fae4751735918d6d5c8b44bd}, intrahash = {fcd88cce5ca6a7c99cb4726921752a1b}, issn = {0138-9130}, journal = {Scientometrics}, language = {English}, number = 2, pages = {245-254}, publisher = {Kluwer Academic Publishers}, title = {Motivations for citation: A comparison of self citation and citation to others}, url = {http://dx.doi.org/10.1007/BF02017571}, volume = 21, year = 1991 } @article{phelan1999compendium, abstract = {This paper examines a number of the criticisms that citation analysis has been subjected to over the years. It is argued that many of these criticisms have been based on only limited examinations of data in particular contexts and it remains unclear how broadly applicable these problems are to research conducted at different levels of analysis, in specific field, and among various national data sets. Relevant evidence is provided from analysis of Australian and international data. }, author = {Phelan, Thomas J.}, doi = {10.1007/BF02458472}, interhash = {a8e468c0850ef735517484b121e30630}, intrahash = {a9d0ef4078c380cb07619a545ed4144d}, issn = {0138-9130}, journal = {Scientometrics}, language = {English}, number = 1, pages = {117-136}, publisher = {Kluwer Academic Publishers}, title = {A compendium of issues for citation analysis}, url = {http://dx.doi.org/10.1007/BF02458472}, volume = 45, year = 1999 } @article{bornmann2008citation, abstract = {Purpose – The purpose of this paper is to present a narrative review of studies on the citing behavior of scientists, covering mainly research published in the last 15 years. Based on the results of these studies, the paper seeks to answer the question of the extent to which scientists are motivated to cite a publication not only to acknowledge intellectual and cognitive influences of scientific peers, but also for other, possibly non‐scientific, reasons.Design/methodology/approach – The review covers research published from the early 1960s up to mid‐2005 (approximately 30 studies on citing behavior‐reporting results in about 40 publications).Findings – The general tendency of the results of the empirical studies makes it clear that citing behavior is not motivated solely by the wish to acknowledge intellectual and cognitive influences of colleague scientists, since the individual studies reveal also other, in part non‐scientific, factors that play a part in the decision to cite. However, the results of the studies must also be deemed scarcely reliable: the studies vary widely in design, and their results can hardly be replicated. Many of the studies have methodological weaknesses. Furthermore, there is evidence that the different motivations of citers are “not so different or ‘randomly given’ to such an extent that the phenomenon of citation would lose its role as a reliable measure of impact”.Originality/value – Given the increasing importance of evaluative bibliometrics in the world of scholarship, the question “What do citation counts measure?” is a particularly relevant and topical issue. }, author = {Bornmann, Lutz and Daniel, Hans‐Dieter}, doi = {10.1108/00220410810844150}, eprint = {http://dx.doi.org/10.1108/00220410810844150}, interhash = {ef016be783f4956817cded258543ece3}, intrahash = {544d3243f7c7327b946292a80f9b6451}, journal = {Journal of Documentation}, number = 1, pages = {45-80}, title = {What do citation counts measure? A review of studies on citing behavior}, url = {http://dx.doi.org/10.1108/00220410810844150 }, volume = 64, year = 2008 } @article{haley2014ranking, abstract = {Recently, Harzing's Publish or Perish software was updated to include Microsoft Academic Search as a second citation database search option for computing various citation-based metrics. This article explores the new search option by scoring 50 top economics and finance journals and comparing them with the results obtained using the original Google Scholar-based search option. The new database delivers significantly smaller scores for all metrics, but the rank correlations across the two databases for the h-index, g-index, AWCR, and e-index are significantly correlated, especially when the time frame is restricted to more recent years. Comparisons are also made to the Article Influence score from eigenfactor.org and to the RePEc h-index, both of which adjust for journal-level self-citations.}, author = {Haley, M. Ryan}, doi = {10.1002/asi.23080}, interhash = {4c6796cff62fe5c8a8cf638f9785cd14}, intrahash = {29feb827b9f64fa5828eb4e6298d38f7}, issn = {2330-1643}, journal = {Journal of the Association for Information Science and Technology}, number = 5, pages = {1079--1084}, title = {Ranking top economics and finance journals using Microsoft academic search versus Google scholar: How does the new publish or perish option compare?}, url = {http://dx.doi.org/10.1002/asi.23080}, volume = 65, year = 2014 } @inproceedings{peters2011crowdsourcing, abstract = {Qualitative journal evaluation makes use of cumulated content descriptions of single articles. These can either be represented by author-generated keywords, professionally indexed subject headings, automatically extracted terms or by reader-generated tags as used in social bookmarking systems. It is assumed that particularly the users? view on article content differs significantly from the authors? or indexers? perspectives. To verify this assumption, title and abstract terms, author keywords, Inspec subject headings, KeyWords PlusTM and tags are compared by calculating the overlap between the respective datasets. Our approach includes extensive term preprocessing (i.e. stemming, spelling unifications) to gain a homogeneous term collection. When term overlap is calculated for every single document of the dataset, similarity values are low. Thus, the presented study confirms the assumption, that the different types of keywords each reflect a different perspective of the articles? contents and that tags (cumulated across articles) can be used in journal evaluation to represent a reader-specific view on published content.}, author = {Peters, Isabella and Haustein, Stefanie and Terliesner, Jens}, booktitle = {ACM WebSci'11}, interhash = {def78a2b12565187bcac0cf08089b7a1}, intrahash = {8e03cf8d57f903da395c07e9a9125f08}, month = {June}, note = {WebSci Conference 2011}, pages = {1--4}, title = {Crowdsourcing in Article Evaluation}, url = {http://journal.webscience.org/487/}, year = 2011 } @article{yan2011spread, abstract = {

The presence of web-based communities is a distinctive signature of Web 2.0. The web-based feature means that information propagation within each community is highly facilitated, promoting complex collective dynamics in view of information exchange. In this work, we focus on a community of scientists and study, in particular, how the awareness of a scientific paper is spread. Our work is based on the web usage statistics obtained from the PLoS Article Level Metrics dataset compiled by PLoS. The cumulative number of HTML views was found to follow a long tail distribution which is reasonably well-fitted by a lognormal one. We modeled the diffusion of information by a random multiplicative process, and thus extracted the rates of information spread at different stages after the publication of a paper. We found that the spread of information displays two distinct decay regimes: a rapid downfall in the first month after publication, and a gradual power law decay afterwards. We identified these two regimes with two distinct driving processes: a short-term behavior driven by the fame of a paper, and a long-term behavior consistent with citation statistics. The patterns of information spread were found to be remarkably similar in data from different journals, but there are intrinsic differences for different types of web usage (HTML views and PDF downloads versus XML). These similarities and differences shed light on the theoretical understanding of different complex systems, as well as a better design of the corresponding web applications that is of high potential marketing impact.

}, author = {Yan, Koon-Kiu and Gerstein, Mark}, doi = {10.1371/journal.pone.0019917}, interhash = {5ff7675888626380767e22ad7f672279}, intrahash = {221dd554089fd1b1918b345fffbd74ce}, journal = {PLoS ONE}, month = {05}, number = 5, pages = {e19917}, publisher = {Public Library of Science}, title = {The Spread of Scientific Information: Insights from the Web Usage Statistics in PLoS Article-Level Metrics}, url = {http://dx.doi.org/10.1371%2Fjournal.pone.0019917}, volume = 6, year = 2011 } @inproceedings{kaur2014scholarometer, abstract = {Scholarometer (scholarometer.indiana.edu) is a social tool developed to facilitate citation analysis and help evaluate the impact of authors. The Scholarometer service allows scholars to compute various citation-based impact measures. In exchange, users provide disciplinary annotations of authors, which allow for the computation of discipline-specific statistics and discipline-neutral impact metrics. We present here two improvements of our system. First, we integrated a new universal impact metric hs that uses crowdsourced data to calculate the global rank of a scholar across disciplinary boundaries. Second, improvements made in ambiguous name classification have increased the accuracy from 80% to 87%.}, acmid = {2615669}, address = {New York, NY, USA}, author = {Kaur, Jasleen and JafariAsbagh, Mohsen and Radicchi, Filippo and Menczer, Filippo}, booktitle = {Proceedings of the 2014 ACM Conference on Web Science}, doi = {10.1145/2615569.2615669}, interhash = {bfb4274f2a002cde9efbe71faf295e6a}, intrahash = {4edc2b8ed7acdd1ef8be4d6eefea8718}, isbn = {978-1-4503-2622-3}, location = {Bloomington, Indiana, USA}, numpages = {2}, pages = {285--286}, publisher = {ACM}, series = {WebSci '14}, title = {Scholarometer: A System for Crowdsourcing Scholarly Impact Metrics}, url = {http://doi.acm.org/10.1145/2615569.2615669}, year = 2014 } @electronic{priem2011altmetrics, author = {Priem, Jason and Taraborelli, Dario and Groth, Paul and Neylon, Cameron}, interhash = {3b42890ba6716182600b39a4b8f1b191}, intrahash = {e9765b890fcf3142edcf0bb77760f0c2}, title = {Altmetrics: a Manifesto}, url = {http://altmetrics.org/manifesto/}, year = 2011 } @article{brody2006earlier, abstract = {The use of citation counts to assess the impact of research articles is well established. However, the citation impact of an article can only be measured several years after it has been published. As research articles are increasingly accessed through the Web, the number of times an article is downloaded can be instantly recorded and counted. One would expect the number of times an article is read to be related both to the number of times it is cited and to how old the article is. The authors analyze how short-term Web usage impact predicts medium-term citation impact. The physics e-print archive—arXiv.org—is used to test this.}, author = {Brody, Tim and Harnad, Stevan and Carr, Leslie}, doi = {10.1002/asi.20373}, interhash = {b4ae997250ae110bcc89826cb2a8205c}, intrahash = {643ec09ec9d1fd641c0416c3d8dde8f6}, issn = {1532-2890}, journal = {Journal of the American Society for Information Science and Technology}, number = 8, pages = {1060--1072}, publisher = {Wiley Subscription Services, Inc., A Wiley Company}, title = {Earlier Web usage statistics as predictors of later citation impact}, url = {http://dx.doi.org/10.1002/asi.20373}, volume = 57, year = 2006 } @article{li2012validating, abstract = {This paper investigates whether CiteULike and Mendeley are useful for measuring scholarly influence, using a sample of 1,613 papers published in Nature and Science in 2007. Traditional citation counts from the Web of Science (WoS) were used as benchmarks to compare with the number of users who bookmarked the articles in one of the two free online reference manager sites. Statistically significant correlations were found between the user counts and the corresponding WoS citation counts, suggesting that this type of influence is related in some way to traditional citation-based scholarly impact but the number of users of these systems seems to be still too small for them to challenge traditional citation indexes.}, author = {Li, Xuemei and Thelwall, Mike and Giustini, Dean}, doi = {10.1007/s11192-011-0580-x}, interhash = {9f186a30dbe5af5dec8a49604bcca3dd}, intrahash = {60c73c95336adf02c315c7b4c434cfd4}, issn = {0138-9130}, journal = {Scientometrics}, language = {English}, number = 2, pages = {461-471}, publisher = {Springer Netherlands}, title = {Validating online reference managers for scholarly impact measurement}, url = {http://dx.doi.org/10.1007/s11192-011-0580-x}, volume = 91, year = 2012 } @article{khabsa2014number, abstract = {The number of scholarly documents available on the web is estimated using capture/recapture methods by studying the coverage of two major academic search engines: Google Scholar and Microsoft Academic Search. Our estimates show that at least 114 million English-language scholarly documents are accessible on the web, of which Google Scholar has nearly 100 million. Of these, we estimate that at least 27 million (24%) are freely available since they do not require a subscription or payment of any kind. In addition, at a finer scale, we also estimate the number of scholarly documents on the web for fifteen fields: Agricultural Science, Arts and Humanities, Biology, Chemistry, Computer Science, Economics and Business, Engineering, Environmental Sciences, Geosciences, Material Science, Mathematics, Medicine, Physics, Social Sciences, and Multidisciplinary, as defined by Microsoft Academic Search. In addition, we show that among these fields the percentage of documents defined as freely available varies significantly, i.e., from 12 to 50%.}, author = {Khabsa, Madian and Giles, C. Lee}, interhash = {61aed8da8eb53c7583d1f27e3cd8fa0c}, intrahash = {8fa9f00fb097a3fd6d0390152c848a37}, journal = {PLoS ONE}, month = may, number = 5, pages = {e93949}, title = {The number of scholarly documents on the public web}, url = {http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0093949}, volume = 9, year = 2014 } @electronic{noorden2014decline, author = {Noorden, Richard Van}, day = {requested 2014-03-13}, interhash = {d1ed9f5ae00884d268fb8d61cc9be940}, intrahash = {59301e0fdf4c292a426bf5c5399fc655}, title = {The decline and fall of Microsoft Academic Search}, url = {http://blogs.nature.com/news/2014/05/the-decline-and-fall-of-microsoft-academic-search.html}, year = 2014 } @inproceedings{saeed2008citation, abstract = {New developments in the collaborative and participatory role of Web has emerged new web based fast lane information systems like tagging and bookmarking applications. Same authors have shown elsewhere, that for same papers tags and bookmarks appear and gain volume very quickly in time as compared to citations and also hold good correlation with the citations. Studying the rank prediction models based on these systems gives advantage of gaining quick insight and localizing the highly productive and diffusible knowledge very early in time. This shows that it may be interesting to model the citation rank of a paper within the scope of a conference or journal issue, based on the bookmark counts (i-e count representing how many researchers have shown interest in a publication.) We used linear regression model for predicting citation ranks and compared both predicted citation rank models of bookmark counts and coauthor network counts for the papers of WWW06 conference. The results show that the rank prediction model based on bookmark counts is far better than the one based on coauthor network with mean absolute error for the first limited to the range of 5 and mean absolute error for second model above 18. Along with this we also compared the two bookmark prediction models out of which one was based on total citations rank as a dependent variable and the other was based on the adjusted citation rank. The citation rank was adjusted after subtracting the self and coauthor citations from total citations. The comparison reveals a significant improvement in the model and correlation after adjusting the citation rank. This may be interpreted that the bookmarking mechanisms represents the phenomenon similar to global discovery of a publication. While in the coauthor nets the papers are communicated personally and this communication or selection may not be captured within the bookmarking systems.}, author = {Saeed, A.U. and Afzal, M.T. and Latif, A. and Tochtermann, K.}, booktitle = {Multitopic Conference, 2008. INMIC 2008. IEEE International}, doi = {10.1109/INMIC.2008.4777769}, interhash = {26d1785cab132d577e377bb5bf299002}, intrahash = {677fc89fef6c79a6a4f25cb25246e38a}, month = dec, pages = {392-397}, title = {Citation rank prediction based on bookmark counts: Exploratory case study of WWW06 papers}, url = {http://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=4777769}, year = 2008 } @article{ioannidis2014published, abstract = {

In a 2005 paper that has been accessed more than a million times, John Ioannidis explained why most published research findings were false. Here he revisits the topic, this time to address how to improve matters.

Please see later in the article for the Editors' Summary

}, author = {Ioannidis, John P. A.}, doi = {10.1371/journal.pmed.1001747}, interhash = {8f87798566749594f170a42763ad239e}, intrahash = {2ced982df534cdc04b9feff0f4206b2a}, journal = {PLoS Med}, month = {10}, number = 10, pages = {e1001747}, publisher = {Public Library of Science}, title = {How to Make More Published Research True}, url = {http://dx.doi.org/10.1371%2Fjournal.pmed.1001747}, volume = 11, year = 2014 } @article{Larsen:2010:Scientometrics:20700371, abstract = {The growth rate of scientific publication has been studied from 1907 to 2007 using available data from a number of literature databases, including Science Citation Index (SCI) and Social Sciences Citation Index (SSCI). Traditional scientific publishing, that is publication in peer-reviewed journals, is still increasing although there are big differences between fields. There are no indications that the growth rate has decreased in the last 50 years. At the same time publication using new channels, for example conference proceedings, open archives and home pages, is growing fast. The growth rate for SCI up to 2007 is smaller than for comparable databases. This means that SCI was covering a decreasing part of the traditional scientific literature. There are also clear indications that the coverage by SCI is especially low in some of the scientific areas with the highest growth rate, including computer science and engineering sciences. The role of conference proceedings, open access archives and publications published on the net is increasing, especially in scientific fields with high growth rates, but this has only partially been reflected in the databases. The new publication channels challenge the use of the big databases in measurements of scientific productivity or output and of the growth rate of science. Because of the declining coverage and this challenge it is problematic that SCI has been used and is used as the dominant source for science indicators based on publication and citation numbers. The limited data available for social sciences show that the growth rate in SSCI was remarkably low and indicate that the coverage by SSCI was declining over time. National Science Indicators from Thomson Reuters is based solely on SCI, SSCI and Arts and Humanities Citation Index (AHCI). Therefore the declining coverage of the citation databases problematizes the use of this source.}, author = {Larsen, P O and von Ins, M}, doi = {10.1007/s11192-010-0202-z}, interhash = {cfb4b308f2ca153eaa7540b7d64b3577}, intrahash = {abdc38dfe051e5b29c8742ab3b950b9c}, journal = {Scientometrics}, month = sep, number = 3, pages = {575-603}, pmid = {20700371}, title = {The rate of growth in scientific publication and the decline in coverage provided by Science Citation Index}, url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2909426/}, volume = 84, year = 2010 } @misc{shuai2012scientific, abstract = {We analyze the online response of the scientific community to the preprint publication of scholarly articles. We employ a cohort of 4,606 scientific articles submitted to the preprint database arXiv.org between October 2010 and April 2011. We study three forms of reactions to these preprints: how they are downloaded on the arXiv.org site, how they are mentioned on the social media site Twitter, and how they are cited in the scholarly record. We perform two analyses. First, we analyze the delay and time span of article downloads and Twitter mentions following submission, to understand the temporal configuration of these reactions and whether significant differences exist between them. Second, we run correlation tests to investigate the relationship between Twitter mentions and both article downloads and article citations. We find that Twitter mentions follow rapidly after article submission and that they are correlated with later article downloads and later article citations, indicating that social media may be an important factor in determining the scientific impact of an article.}, author = {Shuai, Xin and Pepe, Alberto and Bollen, Johan}, interhash = {8331e7736f3cc8296cafd7e6397dc010}, intrahash = {6619e035ee8e7e72ccc9aa32f2acea8e}, note = {cite arxiv:1202.2461}, title = {How the Scientific Community Reacts to Newly Submitted Preprints: Article Downloads, Twitter Mentions, and Citations}, url = {http://arxiv.org/abs/1202.2461}, year = 2012 } @article{thijs2006influence, abstract = {In earlier studies by the authors, basic regularities of author self-citations have been analysed. These regularities are related to the ageing, to the relation between self-citations and foreign citations, to the interdependence of self-citations with other bibliometric indicators and to the influence of co-authorship on self-citation behaviour. Although both national and subject specific peculiarities influence the share of self-citations at the macro level, the authors came to the conclusion that - at this level of aggregation - there is practically no need for excluding self-citations. The aim of the present study is to answer the question in how far the influence of author self-citations on bibliometric meso-indicators deviates from that at the macro level, and to what extent national reference standards can be used in bibliometric meso analyses. In order to study the situation at the institutional level, a selection of twelve European universities representing different countries and different research profiles have been made. The results show a quite complex situation at the meso-level, therefore we suggest the usage of both indicators, including and excluding self-citations.}, affiliation = {Katholieke Universiteit Leuven, Steunpunt O&O Statistieken Leuven (Belgium) Leuven (Belgium)}, author = {Thijs, Bart and Glänzel, Wolfgang}, interhash = {82ea078d91ba87557fb69d7fba5171bc}, intrahash = {c360454b0f49b781ccbbe16840f54b35}, issn = {0138-9130}, journal = {Scientometrics}, keyword = {Informatik}, note = {10.1007/s11192-006-0006-3}, number = 1, pages = {71-80}, publisher = {Akadémiai Kiadó, co-published with Springer Science+Business Media B.V., Formerly Kluwer Academic Publishers B.V.}, title = {The influence of author self-citations on bibliometric meso-indicators. The case of european universities}, url = {http://dx.doi.org/10.1007/s11192-006-0006-3}, volume = 66, year = 2006 } @article{haustein2011applying, abstract = {Web 2.0 technologies are finding their way into academics: specialized social bookmarking services allow researchers to store and share scientific literature online. By bookmarking and tagging articles, academic prosumers generate new information about resources, i.e. usage statistics and content description of scientific journals. Given the lack of global download statistics, the authors propose the application of social bookmarking data to journal evaluation. For a set of 45 physics journals all 13,608 bookmarks from CiteULike, Connotea and BibSonomy to documents published between 2004 and 2008 were analyzed. This article explores bookmarking data in STM and examines in how far it can be used to describe the perception of periodicals by the readership. Four basic indicators are defined, which analyze different aspects of usage: Usage Ratio, Usage Diffusion, Article Usage Intensity and Journal Usage Intensity. Tags are analyzed to describe a reader-specific view on journal content.}, author = {Haustein, Stefanie and Siebenlist, Tobias}, doi = {10.1016/j.joi.2011.04.002}, interhash = {13fe59aae3d6ef95b529ffe00ede4126}, intrahash = {60170943fb293bcb54754710ec9dced1}, issn = {1751-1577}, journal = {Journal of Informetrics}, number = 3, pages = {446 - 457}, title = {Applying social bookmarking data to evaluate journal usage}, url = {http://www.sciencedirect.com/science/article/pii/S1751157711000393}, volume = 5, year = 2011 } @article{fu2008models, abstract = {The single most important bibliometric criterion for judging the impact of biomedical papers and their authors work is the number of citations received which is commonly referred to as citation count. This metric however is unavailable until several years after publication time. In the present work, we build computer models that accurately predict citation counts of biomedical publications within a deep horizon of ten years using only predictive information available at publication time. Our experiments show that it is indeed feasible to accurately predict future citation counts with a mixture of content-based and bibliometric features using machine learning methods. The models pave the way for practical prediction of the long-term impact of publication, and their statistical analysis provides greater insight into citation behavior.}, author = {Fu, Lawrence D. and Aliferis, Constantin}, interhash = {1eb972fa9ba9e255d6889b01532ea767}, intrahash = {39d155a532108bc71437451e31287943}, journal = {AMIA Annu Symp Proc}, pages = {222-226}, pmid = {18999029}, title = {Models for predicting and explaining citation count of biomedical articles}, url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2656101/}, year = 2008 } @article{lokker2008prediction, author = {Lokker, Cynthia and McKibbon, K Ann and McKinlay, R James and Wilczynski, Nancy L and Haynes, R Brian}, doi = {10.1136/bmj.39482.526713.BE}, interhash = {f5f066ee09051d862c1a1c9f34a832c0}, intrahash = {dece3577294846d48f198a6a5e6425c2}, journal = {BMJ}, month = {3}, number = 7645, pages = {655--657}, title = {Prediction of citation counts for clinical articles at two years using data available within three weeks of publication: retrospective cohort study}, volume = 336, year = 2008 } @article{hirsch2007index, abstract = {Bibliometric measures of individual scientific achievement are of particular interest if they can be used to predict future achievement. Here we report results of an empirical study of the predictive power of the h index compared with other indicators. Our findings indicate that the h index is better than other indicators considered (total citation count, citations per paper, and total paper count) in predicting future scientific achievement. We discuss reasons for the superiority of the h index.}, author = {Hirsch, J. E.}, doi = {10.1073/pnas.0707962104}, eprint = {http://www.pnas.org/content/104/49/19193.full.pdf+html}, interhash = {9bc6518ef60bb256ca78287a6c349f05}, intrahash = {43caaad4f117fc3f5c14d83b9082448e}, journal = {Proceedings of the National Academy of Sciences}, number = 49, pages = {19193-19198}, title = {Does the h index have predictive power?}, url = {http://www.pnas.org/content/104/49/19193.abstract}, volume = 104, year = 2007 } @inproceedings{yan2011citation, abstract = {In most of the cases, scientists depend on previous literature which is relevant to their research fields for developing new ideas. However, it is not wise, nor possible, to track all existed publications because the volume of literature collection grows extremely fast. Therefore, researchers generally follow, or cite merely a small proportion of publications which they are interested in. For such a large collection, it is rather interesting to forecast which kind of literature is more likely to attract scientists' response. In this paper, we use the citations as a measurement for the popularity among researchers and study the interesting problem of Citation Count Prediction (CCP) to examine the characteristics for popularity. Estimation of possible popularity is of great significance and is quite challenging. We have utilized several features of fundamental characteristics for those papers that are highly cited and have predicted the popularity degree of each literature in the future. We have implemented a system which takes a series of features of a particular publication as input and produces as output the estimated citation counts of that article after a given time period. We consider several regression models to formulate the learning process and evaluate their performance based on the coefficient of determination (R-square). Experimental results on a real-large data set show that the best predictive model achieves a mean average predictive performance of 0.740 measured in R-square, which significantly outperforms several alternative algorithms.}, acmid = {2063757}, address = {New York, NY, USA}, author = {Yan, Rui and Tang, Jie and Liu, Xiaobing and Shan, Dongdong and Li, Xiaoming}, booktitle = {Proceedings of the 20th ACM international conference on Information and knowledge management}, doi = {10.1145/2063576.2063757}, interhash = {71ec0933a36df3dd21f38285bdf9b1b0}, intrahash = {b0caabb6e17d9b790d3f13c897330aad}, isbn = {978-1-4503-0717-8}, location = {Glasgow, Scotland, UK}, numpages = {6}, pages = {1247--1252}, publisher = {ACM}, series = {CIKM '11}, title = {Citation count prediction: learning to estimate future citations for literature}, url = {http://doi.acm.org/10.1145/2063576.2063757}, year = 2011 } @article{small1973cocitation, abstract = {A new form of document coupling called co-citation is defined as the frequency with which two documents are cited together. The co-citation frequency of two scientific papers can be determined by comparing lists of citing documents in the Science Citation Index and counting identical entries. Networks of co-cited papers can be generated for specific scientific specialties, and an example is drawn from the literature of particle physics. Co-citation patterns are found to differ significantly from bibliographic coupling patterns, but to agree generally with patterns of direct citation. Clusters of co-cited papers provide a new way to study the specialty structure of science. They may provide a new approach to indexing and to the creation of SDI profiles.}, author = {Small, Henry}, doi = {10.1002/asi.4630240406}, interhash = {dfbb7636c96853cc258878548c12d12f}, intrahash = {1dc18dfe50667ff19d5cfa9d52d3e37b}, issn = {1097-4571}, journal = {Journal of the American Society for Information Science}, number = 4, pages = {265--269}, publisher = {Wiley Subscription Services, Inc., A Wiley Company}, title = {Co-citation in the scientific literature: A new measure of the relationship between two documents}, url = {http://dx.doi.org/10.1002/asi.4630240406}, volume = 24, year = 1973 } @article{an2004characterizing, acmid = {1031388}, address = {New York, NY, USA}, author = {An, Yuan and Janssen, Jeannette and Milios, Evangelos E.}, doi = {http://dx.doi.org/10.1007/s10115-003-0128-3}, interhash = {73fdd0592c1641d05da5d2323d9f59ae}, intrahash = {60e0c625f5765a05c588c6765a8cd93c}, issn = {0219-1377}, issue = {6}, journal = {Knowl. Inf. Syst.}, month = {November}, numpages = {15}, pages = {664--678}, publisher = {Springer-Verlag New York, Inc.}, title = {Characterizing and Mining the Citation Graph of the Computer Science Literature}, url = {http://dx.doi.org/10.1007/s10115-003-0128-3}, volume = 6, year = 2004 } @misc{Ren2011, abstract = { It has been known for a long time that citation networks are always highly clustered, such as the existences of abundant triangles and high clustering coefficient. In a growth model, one typical way to produce clustering is using the trid formation mechanism. However, we find that this mechanism fails to generate enough triangles in a real-world citation network. By analyzing the network, it is found that one paper always cites papers that are already highly connected. We point out that the highly connected papers may refer to similar research topic and one subsequent paper tends to cite all of them. Based on this assumption, we propose a growth model for citation networks in which a new paper i firstly attaches to one relevant paper j and then with a probability links those papers in the same clique to which j belongs. We compare our model to two real-world citation networks - one on a special research area and the other on multidisciplinary sciences. Results show that for the two networks the in-degree distributions are matched and the clustering features, i.e., the number of triangles and the average clustering coefficient, are well reproduced. }, author = {Ren, Fu-Xin and Cheng, Xue-Qi and Shen, Hua-Wei}, interhash = {2aab1505ce7da27402449873fb57b48e}, intrahash = {d668e639ed78f4c7ec53eeba64d8ae2a}, note = {cite arxiv:1104.4209}, title = {Modeling the clustering in citation networks}, url = {http://arxiv.org/abs/1104.4209}, year = 2011 } @article{springerlink:10.1023/A:1021919228368, abstract = {This study investigates the role of self-citation in the scientific production of Norway (1981-1996). More than 45,000 publications have been analysed. Using a three-year citation window we find that 36% of all citations represent author self-citations. However, this percentage is decreasing when citations are traced for longer periods. We find the highest share of self-citation among the least cited papers. There is a strong positive correlation between the number of self-citations and the number of authors of the publications. Still, only a minor part of the overall increase in citation rates that can be found for multi-authored papers is due to self-citations. Also, the share of self-citation shows significant variations among different scientific disciplines. The results are relevant for the discussion concerning use of citation indicators in research assessments.}, affiliation = {Norwegian Institute for Studies in Research and Higher Education (NIFU), Hegdehaugsveien 31, NO-0352 Oslo, Norway}, author = {Aksnes, Dag}, interhash = {d929a33337bbbcc3b3bc1ebc2d1fe3d3}, intrahash = {3d68c07447061228985c2e26fb2c391e}, issn = {0138-9130}, issue = {2}, journal = {Scientometrics}, keyword = {Computer Science}, note = {10.1023/A:1021919228368}, pages = {235-246}, publisher = {Akadémiai Kiadó, co-published with Springer Science+Business Media B.V., Formerly Kluwer Academic Publishers B.V.}, title = {A macro study of self-citation}, url = {http://dx.doi.org/10.1023/A:1021919228368}, volume = 56, year = 2003 } @article{egghe2008mathematical, abstract = {This article studies the h-index (Hirsch index) and the g-index of authors, in case one counts authorship of the cited articles in a fractional way. There are two ways to do this: One counts the citations to these papers in a fractional way or one counts the ranks of the papers in a fractional way as credit for an author. In both cases, we define the fractional h- and g-indexes, and we present inequalities (both upper and lower bounds) between these fractional h- and g-indexes and their corresponding unweighted values (also involving, of course, the coauthorship distribution). Wherever applicable, examples and counterexamples are provided. In a concrete example (the publication citation list of the present author), we make explicit calculations of these fractional h- and g-indexes and show that they are not very different from the unweighted ones. © 2008 Wiley Periodicals, Inc.}, acmid = {1398026}, address = {New York, NY, USA}, author = {Egghe, Leo}, doi = {10.1002/asi.v59:10}, interhash = {545ee9605bc386f0246769a38ee5e847}, intrahash = {911412afc8ca68fcf60d6256f1c3c9df}, issn = {1532-2882}, issue = {10}, journal = {J. Am. Soc. Inf. Sci. Technol.}, month = {August}, numpages = {9}, pages = {1608--1616}, publisher = {John Wiley \& Sons, Inc.}, title = {Mathematical theory of the h- and g-index in case of fractional counting of authorship}, url = {http://portal.acm.org/citation.cfm?id=1398018.1398026}, volume = 59, year = 2008 } @misc{Maslov2009, abstract = { We review our recent work on applying the Google PageRank algorithm to find scientific "gems" among all Physical Review publications, and its extension to CiteRank, to find currently popular research directions. These metrics provide a meaningful extension to traditionally-used importance measures, such as the number of citations and journal impact factor. We also point out some pitfalls of over-relying on quantitative metrics to evaluate scientific quality. }, author = {Maslov, Sergei and Redner, S.}, interhash = {8f0a3a222a5c357e4db423ec065065da}, intrahash = {d2b34ecaa23078ebef7a7ee84be509a4}, note = {cite arxiv:0901.2640 Comment: 3 pages, 1 figure, invited comment for the Journal of Neuroscience. The arxiv version is microscopically different from the published version}, title = {Promise and Pitfalls of Extending Google's PageRank Algorithm to Citation Networks}, url = {http://arxiv.org/abs/0901.2640}, year = 2009 }