@article{khabsa2014number, abstract = {The number of scholarly documents available on the web is estimated using capture/recapture methods by studying the coverage of two major academic search engines: Google Scholar and Microsoft Academic Search. Our estimates show that at least 114 million English-language scholarly documents are accessible on the web, of which Google Scholar has nearly 100 million. Of these, we estimate that at least 27 million (24%) are freely available since they do not require a subscription or payment of any kind. In addition, at a finer scale, we also estimate the number of scholarly documents on the web for fifteen fields: Agricultural Science, Arts and Humanities, Biology, Chemistry, Computer Science, Economics and Business, Engineering, Environmental Sciences, Geosciences, Material Science, Mathematics, Medicine, Physics, Social Sciences, and Multidisciplinary, as defined by Microsoft Academic Search. In addition, we show that among these fields the percentage of documents defined as freely available varies significantly, i.e., from 12 to 50%.}, author = {Khabsa, Madian and Giles, C. Lee}, interhash = {61aed8da8eb53c7583d1f27e3cd8fa0c}, intrahash = {8fa9f00fb097a3fd6d0390152c848a37}, journal = {PLoS ONE}, month = may, number = 5, pages = {e93949}, title = {The number of scholarly documents on the public web}, url = {http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0093949}, volume = 9, year = 2014 } @article{ioannidis2014published, abstract = {

In a 2005 paper that has been accessed more than a million times, John Ioannidis explained why most published research findings were false. Here he revisits the topic, this time to address how to improve matters.

Please see later in the article for the Editors' Summary

}, author = {Ioannidis, John P. A.}, doi = {10.1371/journal.pmed.1001747}, interhash = {8f87798566749594f170a42763ad239e}, intrahash = {2ced982df534cdc04b9feff0f4206b2a}, journal = {PLoS Med}, month = {10}, number = 10, pages = {e1001747}, publisher = {Public Library of Science}, title = {How to Make More Published Research True}, url = {http://dx.doi.org/10.1371%2Fjournal.pmed.1001747}, volume = 11, year = 2014 } @article{Larsen:2010:Scientometrics:20700371, abstract = {The growth rate of scientific publication has been studied from 1907 to 2007 using available data from a number of literature databases, including Science Citation Index (SCI) and Social Sciences Citation Index (SSCI). Traditional scientific publishing, that is publication in peer-reviewed journals, is still increasing although there are big differences between fields. There are no indications that the growth rate has decreased in the last 50 years. At the same time publication using new channels, for example conference proceedings, open archives and home pages, is growing fast. The growth rate for SCI up to 2007 is smaller than for comparable databases. This means that SCI was covering a decreasing part of the traditional scientific literature. There are also clear indications that the coverage by SCI is especially low in some of the scientific areas with the highest growth rate, including computer science and engineering sciences. The role of conference proceedings, open access archives and publications published on the net is increasing, especially in scientific fields with high growth rates, but this has only partially been reflected in the databases. The new publication channels challenge the use of the big databases in measurements of scientific productivity or output and of the growth rate of science. Because of the declining coverage and this challenge it is problematic that SCI has been used and is used as the dominant source for science indicators based on publication and citation numbers. The limited data available for social sciences show that the growth rate in SSCI was remarkably low and indicate that the coverage by SSCI was declining over time. National Science Indicators from Thomson Reuters is based solely on SCI, SSCI and Arts and Humanities Citation Index (AHCI). Therefore the declining coverage of the citation databases problematizes the use of this source.}, author = {Larsen, P O and von Ins, M}, doi = {10.1007/s11192-010-0202-z}, interhash = {cfb4b308f2ca153eaa7540b7d64b3577}, intrahash = {abdc38dfe051e5b29c8742ab3b950b9c}, journal = {Scientometrics}, month = sep, number = 3, pages = {575-603}, pmid = {20700371}, title = {The rate of growth in scientific publication and the decline in coverage provided by Science Citation Index}, url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2909426/}, volume = 84, year = 2010 } @article{egghe2008mathematical, abstract = {This article studies the h-index (Hirsch index) and the g-index of authors, in case one counts authorship of the cited articles in a fractional way. There are two ways to do this: One counts the citations to these papers in a fractional way or one counts the ranks of the papers in a fractional way as credit for an author. In both cases, we define the fractional h- and g-indexes, and we present inequalities (both upper and lower bounds) between these fractional h- and g-indexes and their corresponding unweighted values (also involving, of course, the coauthorship distribution). Wherever applicable, examples and counterexamples are provided. In a concrete example (the publication citation list of the present author), we make explicit calculations of these fractional h- and g-indexes and show that they are not very different from the unweighted ones. © 2008 Wiley Periodicals, Inc.}, acmid = {1398026}, address = {New York, NY, USA}, author = {Egghe, Leo}, doi = {10.1002/asi.v59:10}, interhash = {545ee9605bc386f0246769a38ee5e847}, intrahash = {911412afc8ca68fcf60d6256f1c3c9df}, issn = {1532-2882}, issue = {10}, journal = {J. Am. Soc. Inf. Sci. Technol.}, month = {August}, numpages = {9}, pages = {1608--1616}, publisher = {John Wiley \& Sons, Inc.}, title = {Mathematical theory of the h- and g-index in case of fractional counting of authorship}, url = {http://portal.acm.org/citation.cfm?id=1398018.1398026}, volume = 59, year = 2008 }