J |
Brzezinski, M.
(2015):
Power laws in citation distributions: evidence from Scopus.
In: Scientometrics,
Ausgabe/Number: 1,
Vol. 103,
Verlag/Publisher: Springer Netherlands.
Erscheinungsjahr/Year: 2015.
Seiten/Pages: 213-228.
[Volltext] [Kurzfassung] [BibTeX]
[Endnote]
Modeling distributions of citations to scientific papers is crucial for understanding how science develops. However, there is a considerable empirical controversy on which statistical model fits the citation distributions best. This paper is concerned with rigorous empirical detection of power-law behaviour in the distribution of citations received by the most highly cited scientific papers. We have used a large, novel data set on citations to scientific papers published between 1998 and 2002 drawn from Scopus. The power-law model is compared with a number of alternative models using a likelihood ratio test. We have found that the power-law hypothesis is rejected for around half of the Scopus fields of science. For these fields of science, the Yule, power-law with exponential cut-off and log-normal distributions seem to fit the data better than the pure power-law model. On the other hand, when the power-law hypothesis is not rejected, it is usually empirically indistinguishable from most of the alternative models. The pure power-law model seems to be the best model only for the most highly cited papers in “Physics and Astronomy”. Overall, our results seem to support theories implying that the most highly cited scientific papers follow the Yule, power-law with exponential cut-off or log-normal distribution. Our findings suggest also that power laws in citation distributions, when present, account only for a very small fraction of the published papers (less than 1 % for most of science fields) and that the power-law scaling parameter (exponent) is substantially higher (from around 3.2 to around 4.7) than found in the older literature.
@article{brzezinski2015power,
author = {Brzezinski, Michal},
title = {Power laws in citation distributions: evidence from Scopus},
journal = {Scientometrics},
publisher = {Springer Netherlands},
year = {2015},
volume = {103},
number = {1},
pages = {213-228},
url = {http://dx.doi.org/10.1007/s11192-014-1524-z},
doi = {10.1007/s11192-014-1524-z},
issn = {0138-9130},
keywords = {citation, distribution, fit, powerLaw},
abstract = {Modeling distributions of citations to scientific papers is crucial for understanding how science develops. However, there is a considerable empirical controversy on which statistical model fits the citation distributions best. This paper is concerned with rigorous empirical detection of power-law behaviour in the distribution of citations received by the most highly cited scientific papers. We have used a large, novel data set on citations to scientific papers published between 1998 and 2002 drawn from Scopus. The power-law model is compared with a number of alternative models using a likelihood ratio test. We have found that the power-law hypothesis is rejected for around half of the Scopus fields of science. For these fields of science, the Yule, power-law with exponential cut-off and log-normal distributions seem to fit the data better than the pure power-law model. On the other hand, when the power-law hypothesis is not rejected, it is usually empirically indistinguishable from most of the alternative models. The pure power-law model seems to be the best model only for the most highly cited papers in “Physics and Astronomy”. Overall, our results seem to support theories implying that the most highly cited scientific papers follow the Yule, power-law with exponential cut-off or log-normal distribution. Our findings suggest also that power laws in citation distributions, when present, account only for a very small fraction of the published papers (less than 1 % for most of science fields) and that the power-law scaling parameter (exponent) is substantially higher (from around 3.2 to around 4.7) than found in the older literature.}
}
%0 = article
%A = Brzezinski, Michal
%D = 2015
%I = Springer Netherlands
%T = Power laws in citation distributions: evidence from Scopus
%U = http://dx.doi.org/10.1007/s11192-014-1524-z
|
Alstott, J.; Bullmore, E. & Plenz, D.
(2013):
Powerlaw: a Python package for analysis of heavy-tailed distributions.
[Volltext] [Kurzfassung] [BibTeX] [Endnote] Power laws are theoretically interesting probability distributions that are so frequently used to describe empirical data. In recent years effective atistical methods for fitting power laws have been developed, but appropriate e of these techniques requires significant programming and statistical sight. In order to greatly decrease the barriers to using good statistical thods for fitting power law distributions, we developed the powerlaw Python ckage. This software package provides easy commands for basic fitting and atistical analysis of distributions. Notably, it also seeks to support a riety of user needs by being exhaustive in the options available to the user. e source code is publicly available and easily extensible.
@misc{alstott2013powerlaw,
author = {Alstott, Jeff and Bullmore, Ed and Plenz, Dietmar},
title = {Powerlaw: a Python package for analysis of heavy-tailed distributions},
year = {2013},
note = {cite arxiv:1305.0215Comment: 18 pages, 6 figures, code and supporting information at https://github.com/jeffalstott/powerlaw and https://pypi.python.org/pypi/powerlaw},
url = {http://arxiv.org/abs/1305.0215},
doi = {10.1371/journal.pone.0085777},
keywords = {distribution, fit, powerlaw, python},
abstract = {Power laws are theoretically interesting probability distributions that arealso frequently used to describe empirical data. In recent years effectivestatistical methods for fitting power laws have been developed, but appropriateuse of these techniques requires significant programming and statisticalinsight. In order to greatly decrease the barriers to using good statisticalmethods for fitting power law distributions, we developed the powerlaw Pythonpackage. This software package provides easy commands for basic fitting andstatistical analysis of distributions. Notably, it also seeks to support avariety of user needs by being exhaustive in the options available to the user.The source code is publicly available and easily extensible.}
}
%0 = misc
%A = Alstott, Jeff and Bullmore, Ed and Plenz, Dietmar
%B = }
%C =
%D = 2013
%I =
%T = Powerlaw: a Python package for analysis of heavy-tailed distributions}
%U = http://arxiv.org/abs/1305.0215
|
|
J |
Clauset, A.; Shalizi, C. R. & Newman, M. E. J.
(2009):
Power-Law Distributions in Empirical Data.
In: SIAM Review,
Ausgabe/Number: 4,
Vol. 51,
Erscheinungsjahr/Year: 2009.
Seiten/Pages: 661-703.
[Volltext] [BibTeX]
[Endnote]
@article{clauset2009powerlaw,
author = {Clauset, Aaron and Shalizi, Cosma Rohilla and Newman, M. E. J.},
title = {Power-Law Distributions in Empirical Data},
journal = {SIAM Review},
year = {2009},
volume = {51},
number = {4},
pages = {661-703},
url = {/brokenurl# http://dx.doi.org/10.1137/070710111 },
doi = {10.1137/070710111},
keywords = {clauset, empirical, fit, powerLaw}
}
%0 = article
%A = Clauset, Aaron and Shalizi, Cosma Rohilla and Newman, M. E. J.
%D = 2009
%T = Power-Law Distributions in Empirical Data
%U = /brokenurl# http://dx.doi.org/10.1137/070710111
|
J |
Clauset, A.; Shalizi, C. R. & Newman, M. E. J.
(2009):
Power-Law Distributions in Empirical Data.
In: SIAM Review,
Ausgabe/Number: 4,
Vol. 51,
Verlag/Publisher: SIAM.
Erscheinungsjahr/Year: 2009.
Seiten/Pages: 661-703.
[Volltext] [Kurzfassung] [BibTeX]
[Endnote]
Power-law distributions occur in many situations of scientific interest and have significant consequences for our understanding of natural and man-made phenomena. Unfortunately, the detection and characterization of power laws is complicated by the large fluctuations that occur in the tail of the distribution—the part of the distribution representing large but rare events—and by the difficulty of identifying the range over which power-law behavior holds. Commonly used methods for analyzing power-law data, such as least-squares fitting, can produce substantially inaccurate estimates of parameters for power-law distributions, and even in cases where such methods return accurate answers they are still unsatisfactory because they give no indication of whether the data obey a power law at all. Here we present a principled statistical framework for discerning and quantifying power-law behavior in empirical data. Our approach combines maximum-likelihood fitting methods with goodness-of-fit tests based on the Kolmogorov–Smirnov (KS) statistic and likelihood ratios. We evaluate the effectiveness of the approach with tests on synthetic data and give critical comparisons to previous approaches. We also apply the proposed methods to twenty-four real-world data sets from a range of different disciplines, each of which has been conjectured to follow a power-law distribution. In some cases we find these conjectures to be consistent with the data, while in others the power law is ruled out.
@article{clauset2009powerlaw,
author = {Clauset, Aaron and Shalizi, Cosma Rohilla and Newman, M. E. J.},
title = {Power-Law Distributions in Empirical Data},
journal = {SIAM Review},
publisher = {SIAM},
year = {2009},
volume = {51},
number = {4},
pages = {661--703},
url = {http://link.aip.org/link/?SIR/51/661/1},
doi = {10.1137/070710111},
issn = {0036-1445},
keywords = {law, power, powerlaw},
abstract = {Power-law distributions occur in many situations of scientific interest and have significant consequences for our understanding of natural and man-made phenomena. Unfortunately, the detection and characterization of power laws is complicated by the large fluctuations that occur in the tail of the distribution—the part of the distribution representing large but rare events—and by the difficulty of identifying the range over which power-law behavior holds. Commonly used methods for analyzing power-law data, such as least-squares fitting, can produce substantially inaccurate estimates of parameters for power-law distributions, and even in cases where such methods return accurate answers they are still unsatisfactory because they give no indication of whether the data obey a power law at all. Here we present a principled statistical framework for discerning and quantifying power-law behavior in empirical data. Our approach combines maximum-likelihood fitting methods with goodness-of-fit tests based on the Kolmogorov–Smirnov (KS) statistic and likelihood ratios. We evaluate the effectiveness of the approach with tests on synthetic data and give critical comparisons to previous approaches. We also apply the proposed methods to twenty-four real-world data sets from a range of different disciplines, each of which has been conjectured to follow a power-law distribution. In some cases we find these conjectures to be consistent with the data, while in others the power law is ruled out.}
}
%0 = article
%A = Clauset, Aaron and Shalizi, Cosma Rohilla and Newman, M. E. J.
%D = 2009
%I = SIAM
%T = Power-Law Distributions in Empirical Data
%U = http://link.aip.org/link/?SIR/51/661/1
|
Clauset, A.; Shalizi, C. R. & Newman, M. E. J.
(2007):
Power-law distributions in empirical data.
[Volltext] [Kurzfassung] [BibTeX] [Endnote] Power-law distributions occur in many situations of scientific interest and ve significant consequences for our understanding of natural and man-made enomena. Unfortunately, the detection and characterization of power laws is mplicated by the large fluctuations that occur in the tail of the stribution - the part of the distribution representing large but rare events and by the difficulty of identifying the range over which power-law behavior lds. Commonly used methods for analyzing power-law data, such as ast-squares fitting, can produce substantially inaccurate estimates of rameters for power-law distributions, and even in cases where such methods turn accurate answers they are still unsatisfactory because they give no dication of whether the data obey a power law at all. Here we present a incipled statistical framework for discerning and quantifying power-law havior in empirical data. Our approach combines maximum-likelihood fitting thods with goodness-of-fit tests based on the Kolmogorov-Smirnov statistic d likelihood ratios. We evaluate the effectiveness of the approach with tests synthetic data and give critical comparisons to previous approaches. We also ply the proposed methods to twenty-four real-world data sets from a range of fferent disciplines, each of which has been conjectured to follow a power-law stribution. In some cases we find these conjectures to be consistent with the ta while in others the power law is ruled out.
@misc{clauset2007powerlaw,
author = {Clauset, Aaron and Shalizi, Cosma Rohilla and Newman, M. E. J.},
title = {Power-law distributions in empirical data},
year = {2007},
note = {cite arxiv:0706.1062Comment: 43 pages, 11 figures, 7 tables, 4 appendices; code available at http://www.santafe.edu/~aaronc/powerlaws/},
url = {http://arxiv.org/abs/0706.1062},
doi = {10.1137/070710111},
keywords = {data, distribution, distributions, empirical, law, power, powerlaw},
abstract = {Power-law distributions occur in many situations of scientific interest andhave significant consequences for our understanding of natural and man-madephenomena. Unfortunately, the detection and characterization of power laws iscomplicated by the large fluctuations that occur in the tail of thedistribution -- the part of the distribution representing large but rare events-- and by the difficulty of identifying the range over which power-law behaviorholds. Commonly used methods for analyzing power-law data, such asleast-squares fitting, can produce substantially inaccurate estimates ofparameters for power-law distributions, and even in cases where such methodsreturn accurate answers they are still unsatisfactory because they give noindication of whether the data obey a power law at all. Here we present aprincipled statistical framework for discerning and quantifying power-lawbehavior in empirical data. Our approach combines maximum-likelihood fittingmethods with goodness-of-fit tests based on the Kolmogorov-Smirnov statisticand likelihood ratios. We evaluate the effectiveness of the approach with testson synthetic data and give critical comparisons to previous approaches. We alsoapply the proposed methods to twenty-four real-world data sets from a range ofdifferent disciplines, each of which has been conjectured to follow a power-lawdistribution. In some cases we find these conjectures to be consistent with thedata while in others the power law is ruled out.}
}
%0 = misc
%A = Clauset, Aaron and Shalizi, Cosma Rohilla and Newman, M. E. J.
%B = }
%C =
%D = 2007
%I =
%T = Power-law distributions in empirical data}
%U = http://arxiv.org/abs/0706.1062
|
|
J |
Goldstein, M. L.; Morris, S. A. & Yen, G. G.
(2004):
Fitting to the power-law distribution.
In: The European Physical Journal B - Condensed Matter and Complex Systems,
Ausgabe/Number: 2,
Vol. 41,
Erscheinungsjahr/Year: 2004.
Seiten/Pages: 255-258.
[Volltext] [Kurzfassung] [BibTeX]
[Endnote]
Version 1 of Goldstein 04 power law fit containing also the chi 2 test
@article{Goldstein04powerlawfitV1,
author = {Goldstein, M. L. and Morris, S. A. and Yen, G. G.},
title = {Fitting to the power-law distribution},
journal = {The European Physical Journal B - Condensed Matter and Complex Systems},
year = {2004},
volume = {41},
number = {2},
pages = {255-258},
url = {http://arxiv.org/abs/cond-mat/0402322v1},
keywords = {distribution, fitting, law, power, powerlaw},
abstract = {Version 1 of Goldstein 04 power law fit containing also the chi 2 test}
}
%0 = article
%A = Goldstein, M. L. and Morris, S. A. and Yen, G. G.
%D = 2004
%T = Fitting to the power-law distribution
%U = http://arxiv.org/abs/cond-mat/0402322v1
|
J |
Mitzenmacher, M.
(2004):
A Brief History of Generative Models for Power Law and Lognormal Distributions
. In: Internet Mathematics, Ausgabe/Number: 2, Vol. 1, Erscheinungsjahr/Year: 2004. Seiten/Pages: 226-251. [Volltext] [Kurzfassung] [BibTeX] [Endnote] Recently, I became interested in a current debate over whether file size distributions are best modelled by a power law distribution or a lognormal distribution. In trying to learn enough about these distributions to settle the question, I found a rich and long history, spanning many fields. Indeed, several recently proposed models from the computer science community have antecedents in work from decades ago. Here, I briefly survey some of this history, focusing on underlying generative models that ad to these distributions. One finding is that lognormal and power law distributions connect quite naturally, and hence, it is not surprising that lognormal distributions have arisen as a possible alternative to power law distributions across many fields.
@article{mitzenmacher2004history,
author = {Mitzenmacher, M.},
title = {A Brief History of Generative Models for Power Law and Lognormal Distributions
}, journal = {Internet Mathematics}, year = {2004}, volume = {1}, number = {2}, pages = {226--251}, url = {http://www.eecs.harvard.edu/~michaelm/CS223/powerlaw.pdf}, keywords = {generative, law, model, power, powerlaw}, abstract = {Recently, I became interested in a current debate over whether file size distributions are best modelled by a power law distribution or a lognormal distribution. In trying to learn enough about these distributions to settle the question, I found a rich and long history, spanning many fields. Indeed, several recently proposed models from the computer science community have antecedents in work from decades ago. Here, I briefly survey some of this history, focusing on underlying generative models thatlead to these distributions. One finding is that lognormal and power law distributions connect quite naturally, and hence, it is not surprising that lognormal distributions have arisen as a possible alternative to power law distributions across many fields.} }
%0 = article
%A = Mitzenmacher, M.
%D = 2004
%T = A Brief History of Generative Models for Power Law and Lognormal Distributions
%U = http://www.eecs.harvard.edu/~michaelm/CS223/powerlaw.pdf |
Newman, M. E. J.
(2004):
Power laws, Pareto distributions and Zipf's law.
[Volltext] [Kurzfassung] [BibTeX] [Endnote] When the probability of measuring a particular value of some quantity varies versely as a power of that value, the quantity is said to follow a power law, so known variously as Zipf's law or the Pareto distribution. Power laws pear widely in physics, biology, earth and planetary sciences, economics and nance, computer science, demography and the social sciences. For instance, e distributions of the sizes of cities, earthquakes, solar flares, moon aters, wars and people's personal fortunes all appear to follow power laws. e origin of power-law behaviour has been a topic of debate in the scientific mmunity for more than a century. Here we review some of the empirical idence for the existence of power-law forms and the theories proposed to plain them.
@misc{newman2004power,
author = {Newman, M. E. J.},
title = {Power laws, Pareto distributions and Zipf's law},
year = {2004},
url = {http://arxiv.org/abs/cond-mat/0412004},
keywords = {law, power, powerlaw},
abstract = {When the probability of measuring a particular value of some quantity variesinversely as a power of that value, the quantity is said to follow a power law,also known variously as Zipf's law or the Pareto distribution. Power lawsappear widely in physics, biology, earth and planetary sciences, economics andfinance, computer science, demography and the social sciences. For instance,the distributions of the sizes of cities, earthquakes, solar flares, mooncraters, wars and people's personal fortunes all appear to follow power laws.The origin of power-law behaviour has been a topic of debate in the scientificcommunity for more than a century. Here we review some of the empiricalevidence for the existence of power-law forms and the theories proposed toexplain them.}
}
%0 = misc
%A = Newman, M. E. J.
%B = }
%C =
%D = 2004
%I =
%T = Power laws, Pareto distributions and Zipf's law}
%U = http://arxiv.org/abs/cond-mat/0412004
|
|
J |
Adamic, L. A. & Huberman, B. A.
(2002):
Zipf's Law and the Internet.
In: Glottometrics,
Vol. 3,
Erscheinungsjahr/Year: 2002.
Seiten/Pages: 143-150.
[BibTeX]
[Endnote]
@article{adamic02zipf,
author = {Adamic, L. A. and Huberman, B. A.},
title = {Zipf's Law and the Internet},
journal = {Glottometrics},
year = {2002},
volume = {3},
pages = {143-150},
keywords = {Zipf, flat, law, long, pareto, power, powerlaw, tail}
}
%0 = article
%A = Adamic, L. A. and Huberman, B. A.
%D = 2002
%T = Zipf's Law and the Internet
|
Adamic, L.
(2002):
Zipf, Power-laws, and Pareto - a ranking tutorial .
[Volltext] [BibTeX] [Endnote]
@misc{adamic02tutorial,
author = {Adamic, Lada},
title = {Zipf, Power-laws, and Pareto -- a ranking tutorial },
year = {2002},
url = {http://www.hpl.hp.com/research/idl/papers/ranking/ranking.html},
keywords = {Pareto, Power, Zipf, law, long, powerlaw, tail}
}
%0 = misc
%A = Adamic, Lada
%B = }
%C =
%D = 2002
%I =
%T = Zipf, Power-laws, and Pareto -- a ranking tutorial }
%U = http://www.hpl.hp.com/research/idl/papers/ranking/ranking.html
|
|
J |
Pennock, D.; Flake, G.; Lawrence, S.; Glover, E. & Giles, C. L.
(2002):
Winners don't take all: Characterizing the
competition for links on the web. In: Proc.National Academy of Sciences, Ausgabe/Number: 8, Vol. 99, Erscheinungsjahr/Year: 2002. Seiten/Pages: 5207-5211. [BibTeX] [Endnote]
@article{pennock2002winners,
author = {Pennock, David and Flake, Gary and Lawrence, Steve and Glover, Eric and Giles, C. Lee},
title = {Winners don't take all: Characterizing the
competition for links on the web}, journal = {Proc.National Academy of Sciences}, year = {2002}, volume = {99}, number = {8}, pages = {5207--5211}, keywords = {community, degree, distribution, model, powerlaw, smallworld} }
%0 = article
%A = Pennock, David and Flake, Gary and Lawrence, Steve and Glover, Eric and Giles, C. Lee
%D = 2002
%T = Winners don't take all: Characterizing the
competition for links on the web |
J |
Vuong, Q. H.
(1989):
Likelihood Ratio Tests for Model Selection and Non-Nested Hypotheses.
In: Econometrica,
Ausgabe/Number: 2,
Vol. 57,
Verlag/Publisher: The Econometric Society.
Erscheinungsjahr/Year: 1989.
Seiten/Pages: pp. 307-333.
[Volltext] [Kurzfassung] [BibTeX]
[Endnote]
In this paper, we develop a classical approach to model selection. Using the Kullback-Leibler Information Criterion to measure the closeness of a model to the truth, we propose simple likelihood-ratio based statistics for testing the null hypothesis that the competing models are equally close to the true data generating process against the alternative hypothesis that one model is closer. The tests are directional and are derived successively for the cases where the competing models are non-nested, overlapping, or nested and whether both, one, or neither is misspecified. As a prerequisite, we fully characterize the asymptotic distribution of the likelihood ratio statistic under the most general conditions. We show that it is a weighted sum of chi-square distribution or a normal distribution depending on whether the distributions in the competing models closest to the truth are observationally identical. We also propose a test of this latter condition.
@article{vuong1989likelihood,
author = {Vuong, Quang H.},
title = {Likelihood Ratio Tests for Model Selection and Non-Nested Hypotheses},
journal = {Econometrica},
publisher = {The Econometric Society},
year = {1989},
volume = {57},
number = {2},
pages = {pp. 307-333},
url = {http://www.jstor.org/stable/1912557},
issn = {00129682},
keywords = {comparision, hypothesis, likelihood, powerLaw, testing},
abstract = {In this paper, we develop a classical approach to model selection. Using the Kullback-Leibler Information Criterion to measure the closeness of a model to the truth, we propose simple likelihood-ratio based statistics for testing the null hypothesis that the competing models are equally close to the true data generating process against the alternative hypothesis that one model is closer. The tests are directional and are derived successively for the cases where the competing models are non-nested, overlapping, or nested and whether both, one, or neither is misspecified. As a prerequisite, we fully characterize the asymptotic distribution of the likelihood ratio statistic under the most general conditions. We show that it is a weighted sum of chi-square distribution or a normal distribution depending on whether the distributions in the competing models closest to the truth are observationally identical. We also propose a test of this latter condition.}
}
%0 = article
%A = Vuong, Quang H.
%D = 1989
%I = The Econometric Society
%T = Likelihood Ratio Tests for Model Selection and Non-Nested Hypotheses
%U = http://www.jstor.org/stable/1912557
|