Clauset, A.; Shalizi, C. R. & Newman, M. E. J.: Power-Law Distributions in Empirical Data. In: SIAM Review 51 (2009), Nr. 4, S. 661-703
[Volltext]
@article{clauset2009powerlaw,
author = {Clauset, Aaron and Shalizi, Cosma Rohilla and Newman, M. E. J.},
title = {Power-Law Distributions in Empirical Data},
journal = {SIAM Review},
year = {2009},
volume = {51},
number = {4},
pages = {661-703},
url = {/brokenurl# http://dx.doi.org/10.1137/070710111 },
doi = {10.1137/070710111},
keywords = {clauset, fit, empirical, powerLaw}
}
Clauset, A.; Shalizi, C. R. & Newman, M. E. J.: Power-Law Distributions in Empirical Data. In: SIAM Review 51 (2009), Nr. 4, S. 661-703
[Volltext]
Power-law distributions occur in many situations of scientific interest and have significant consequences for our understanding of natural and man-made phenomena. Unfortunately, the detection and characterization of power laws is complicated by the large fluctuations that occur in the tail of the distribution—the part of the distribution representing large but rare events—and by the difficulty of identifying the range over which power-law behavior holds. Commonly used methods for analyzing power-law data, such as least-squares fitting, can produce substantially inaccurate estimates of parameters for power-law distributions, and even in cases where such methods return accurate answers they are still unsatisfactory because they give no indication of whether the data obey a power law at all. Here we present a principled statistical framework for discerning and quantifying power-law behavior in empirical data. Our approach combines maximum-likelihood fitting methods with goodness-of-fit tests based on the Kolmogorov–Smirnov (KS) statistic and likelihood ratios. We evaluate the effectiveness of the approach with tests on synthetic data and give critical comparisons to previous approaches. We also apply the proposed methods to twenty-four real-world data sets from a range of different disciplines, each of which has been conjectured to follow a power-law distribution. In some cases we find these conjectures to be consistent with the data, while in others the power law is ruled out.
@article{clauset2009powerlaw,
author = {Clauset, Aaron and Shalizi, Cosma Rohilla and Newman, M. E. J.},
title = {Power-Law Distributions in Empirical Data},
journal = {SIAM Review},
publisher = {SIAM},
year = {2009},
volume = {51},
number = {4},
pages = {661--703},
url = {http://link.aip.org/link/?SIR/51/661/1},
doi = {10.1137/070710111},
keywords = {law, power, powerlaw},
abstract = {Power-law distributions occur in many situations of scientific interest and have significant consequences for our understanding of natural and man-made phenomena. Unfortunately, the detection and characterization of power laws is complicated by the large fluctuations that occur in the tail of the distribution—the part of the distribution representing large but rare events—and by the difficulty of identifying the range over which power-law behavior holds. Commonly used methods for analyzing power-law data, such as least-squares fitting, can produce substantially inaccurate estimates of parameters for power-law distributions, and even in cases where such methods return accurate answers they are still unsatisfactory because they give no indication of whether the data obey a power law at all. Here we present a principled statistical framework for discerning and quantifying power-law behavior in empirical data. Our approach combines maximum-likelihood fitting methods with goodness-of-fit tests based on the Kolmogorov–Smirnov (KS) statistic and likelihood ratios. We evaluate the effectiveness of the approach with tests on synthetic data and give critical comparisons to previous approaches. We also apply the proposed methods to twenty-four real-world data sets from a range of different disciplines, each of which has been conjectured to follow a power-law distribution. In some cases we find these conjectures to be consistent with the data, while in others the power law is ruled out.}
}
Clauset, A.; Shalizi, C. R. & Newman, M. E. J.: Power-Law Distributions in Empirical Data. In: SIAM Review 51 (2009), Nr. 4, S. 661-703
[Volltext]
Power-law distributions occur in many situations of scientific interest and have significant consequences for our understanding of natural and man-made phenomena. Unfortunately, the detection and characterization of power laws is complicated by the large fluctuations that occur in the tail of the distribution—the part of the distribution representing large but rare events—and by the difficulty of identifying the range over which power-law behavior holds. Commonly used methods for analyzing power-law data, such as least-squares fitting, can produce substantially inaccurate estimates of parameters for power-law distributions, and even in cases where such methods return accurate answers they are still unsatisfactory because they give no indication of whether the data obey a power law at all. Here we present a principled statistical framework for discerning and quantifying power-law behavior in empirical data. Our approach combines maximum-likelihood fitting methods with goodness-of-fit tests based on the Kolmogorov–Smirnov (KS) statistic and likelihood ratios. We evaluate the effectiveness of the approach with tests on synthetic data and give critical comparisons to previous approaches. We also apply the proposed methods to twenty-four real-world data sets from a range of different disciplines, each of which has been conjectured to follow a power-law distribution. In some cases we find these conjectures to be consistent with the data, while in others the power law is ruled out.
@article{clauset2009powerlaw,
author = {Clauset, Aaron and Shalizi, Cosma Rohilla and Newman, M. E. J.},
title = {Power-Law Distributions in Empirical Data},
journal = {SIAM Review},
publisher = {SIAM},
year = {2009},
volume = {51},
number = {4},
pages = {661--703},
url = {http://link.aip.org/link/?SIR/51/661/1},
doi = {10.1137/070710111},
keywords = {power-law, statistics, analysis, data},
abstract = {Power-law distributions occur in many situations of scientific interest and have significant consequences for our understanding of natural and man-made phenomena. Unfortunately, the detection and characterization of power laws is complicated by the large fluctuations that occur in the tail of the distribution—the part of the distribution representing large but rare events—and by the difficulty of identifying the range over which power-law behavior holds. Commonly used methods for analyzing power-law data, such as least-squares fitting, can produce substantially inaccurate estimates of parameters for power-law distributions, and even in cases where such methods return accurate answers they are still unsatisfactory because they give no indication of whether the data obey a power law at all. Here we present a principled statistical framework for discerning and quantifying power-law behavior in empirical data. Our approach combines maximum-likelihood fitting methods with goodness-of-fit tests based on the Kolmogorov–Smirnov (KS) statistic and likelihood ratios. We evaluate the effectiveness of the approach with tests on synthetic data and give critical comparisons to previous approaches. We also apply the proposed methods to twenty-four real-world data sets from a range of different disciplines, each of which has been conjectured to follow a power-law distribution. In some cases we find these conjectures to be consistent with the data, while in others the power law is ruled out.}
}
Clauset, A.; Shalizi, C. R. & Newman, M. E. J.: Power-law distributions in empirical data. , 2007
[Volltext]
Power-law distributions occur in many situations of scientific interest and
ve significant consequences for our understanding of natural and man-made
enomena. Unfortunately, the detection and characterization of power laws is
mplicated by the large fluctuations that occur in the tail of the
stribution - the part of the distribution representing large but rare events
and by the difficulty of identifying the range over which power-law behavior
lds. Commonly used methods for analyzing power-law data, such as
ast-squares fitting, can produce substantially inaccurate estimates of
rameters for power-law distributions, and even in cases where such methods
turn accurate answers they are still unsatisfactory because they give no
dication of whether the data obey a power law at all. Here we present a
incipled statistical framework for discerning and quantifying power-law
havior in empirical data. Our approach combines maximum-likelihood fitting
thods with goodness-of-fit tests based on the Kolmogorov-Smirnov statistic
d likelihood ratios. We evaluate the effectiveness of the approach with tests
synthetic data and give critical comparisons to previous approaches. We also
ply the proposed methods to twenty-four real-world data sets from a range of
fferent disciplines, each of which has been conjectured to follow a power-law
stribution. In some cases we find these conjectures to be consistent with the
ta while in others the power law is ruled out.
@misc{clauset2007powerlaw,
author = {Clauset, Aaron and Shalizi, Cosma Rohilla and Newman, M. E. J.},
title = {Power-law distributions in empirical data},
year = {2007},
note = {cite arxiv:0706.1062Comment: 43 pages, 11 figures, 7 tables, 4 appendices; code available at http://www.santafe.edu/~aaronc/powerlaws/},
url = {http://arxiv.org/abs/0706.1062},
doi = {10.1137/070710111},
keywords = {distributions, law, distribution, empirical, power, data, powerlaw},
abstract = {Power-law distributions occur in many situations of scientific interest and
ve significant consequences for our understanding of natural and man-made
enomena. Unfortunately, the detection and characterization of power laws is
mplicated by the large fluctuations that occur in the tail of the
stribution -- the part of the distribution representing large but rare events
and by the difficulty of identifying the range over which power-law behavior
lds. Commonly used methods for analyzing power-law data, such as
ast-squares fitting, can produce substantially inaccurate estimates of
rameters for power-law distributions, and even in cases where such methods
turn accurate answers they are still unsatisfactory because they give no
dication of whether the data obey a power law at all. Here we present a
incipled statistical framework for discerning and quantifying power-law
havior in empirical data. Our approach combines maximum-likelihood fitting
thods with goodness-of-fit tests based on the Kolmogorov-Smirnov statistic
d likelihood ratios. We evaluate the effectiveness of the approach with tests
synthetic data and give critical comparisons to previous approaches. We also
ply the proposed methods to twenty-four real-world data sets from a range of
fferent disciplines, each of which has been conjectured to follow a power-law
stribution. In some cases we find these conjectures to be consistent with the
ta while in others the power law is ruled out.}
}
Clauset, A.; Shalizi, C. R. & Newman, M. E. J.: Power-law distributions in empirical data. , 2007
[Volltext]
Power-law distributions occur in many situations of scientific interest and
ve significant consequences for our understanding of natural and man-made
enomena. Unfortunately, the detection and characterization of power laws is
mplicated by the large fluctuations that occur in the tail of the
stribution - the part of the distribution representing large but rare events
and by the difficulty of identifying the range over which power-law behavior
lds. Commonly used methods for analyzing power-law data, such as
ast-squares fitting, can produce substantially inaccurate estimates of
rameters for power-law distributions, and even in cases where such methods
turn accurate answers they are still unsatisfactory because they give no
dication of whether the data obey a power law at all. Here we present a
incipled statistical framework for discerning and quantifying power-law
havior in empirical data. Our approach combines maximum-likelihood fitting
thods with goodness-of-fit tests based on the Kolmogorov-Smirnov statistic
d likelihood ratios. We evaluate the effectiveness of the approach with tests
synthetic data and give critical comparisons to previous approaches. We also
ply the proposed methods to twenty-four real-world data sets from a range of
fferent disciplines, each of which has been conjectured to follow a power-law
stribution. In some cases we find these conjectures to be consistent with the
ta while in others the power law is ruled out.
@misc{Clauset2007,
author = {Clauset, Aaron and Shalizi, Cosma Rohilla and Newman, M. E. J.},
title = {Power-law distributions in empirical data},
year = {2007},
note = {cite arxiv:0706.1062
mment: 43 pages, 11 figures, 7 tables, 4 appendices; code available at
http://www.santafe.edu/~aaronc/powerlaws/},
url = {http://arxiv.org/abs/0706.1062},
keywords = {fit, law, analysis, power, data},
abstract = { Power-law distributions occur in many situations of scientific interest and
ve significant consequences for our understanding of natural and man-made
enomena. Unfortunately, the detection and characterization of power laws is
mplicated by the large fluctuations that occur in the tail of the
stribution -- the part of the distribution representing large but rare events
and by the difficulty of identifying the range over which power-law behavior
lds. Commonly used methods for analyzing power-law data, such as
ast-squares fitting, can produce substantially inaccurate estimates of
rameters for power-law distributions, and even in cases where such methods
turn accurate answers they are still unsatisfactory because they give no
dication of whether the data obey a power law at all. Here we present a
incipled statistical framework for discerning and quantifying power-law
havior in empirical data. Our approach combines maximum-likelihood fitting
thods with goodness-of-fit tests based on the Kolmogorov-Smirnov statistic
d likelihood ratios. We evaluate the effectiveness of the approach with tests
synthetic data and give critical comparisons to previous approaches. We also
ply the proposed methods to twenty-four real-world data sets from a range of
fferent disciplines, each of which has been conjectured to follow a power-law
stribution. In some cases we find these conjectures to be consistent with the
ta while in others the power law is ruled out.
}
}
Clauset, A.; Newman, M. E. J. & Moore, C.: Finding community structure in very large networks. In: Physical Review E (2004), S. 1- 6
Abstract: The discovery and analysis of community structure in networks is a topic of considerable recent interest within the physics community, but most methods proposed so far are unsuitable for very large networks because of their computational cost. Here we present a hierarchical agglomeration algorithm for detecting community structure which is faster than many competing algorithms: its running time on a network with n vertices and m edges is O(mdlog n) where d is the depth of the dendrogram describing the community structure. Many real-world networks are sparse and hierarchical, with m n and d log n, in which case our algorithm runs in essentially linear time, O(n log2 n). As an example of the application of this algorithm we use it to analyze a network of items for sale on the web-site of a large online retailer, items in the network being linked if they are frequently purchased by the same buyer. The network has more than 400 000 vertices and 2 million edges. We show that our algorithm can extract meaningful communities from this network, revealing large-scale patterns present in the purchasing habits of customers.
@article{clauset2004,
author = {Clauset, Aaron and Newman, M. E. J. and and Cristopher Moore},
title = {Finding community structure in very large networks},
journal = {Physical Review E},
year = {2004},
pages = {1-- 6},
doi = {10.1103/PhysRevE.70.066111},
keywords = {2014, kde, bachelorarbeit},
abstract = {Abstract: The discovery and analysis of community structure in networks is a topic of considerable recent interest within the physics community, but most methods proposed so far are unsuitable for very large networks because of their computational cost. Here we present a hierarchical agglomeration algorithm for detecting community structure which is faster than many competing algorithms: its running time on a network with n vertices and m edges is O(mdlog n) where d is the depth of the dendrogram describing the community structure. Many real-world networks are sparse and hierarchical, with m n and d log n, in which case our algorithm runs in essentially linear time, O(n log2 n). As an example of the application of this algorithm we use it to analyze a network of items for sale on the web-site of a large online retailer, items in the network being linked if they are frequently purchased by the same buyer. The network has more than 400 000 vertices and 2 million edges. We show that our algorithm can extract meaningful communities from this network, revealing large-scale patterns present in the purchasing habits of customers.}
}
Clauset, A.; Newman, M. E. J. & Moore, C.: Finding community structure in very large networks. In: Physical Review E 70 (2004), S. 066111
[Volltext]
@article{clauset-2004-70,
author = {Clauset, Aaron and Newman, M. E. J. and Moore, Cristopher},
title = {Finding community structure in very large networks},
journal = {Physical Review E},
year = {2004},
volume = {70},
pages = {066111},
url = {http://www.citebase.org/abstract?id=oai:arXiv.org:cond-mat/0408187},
keywords = {networks, clustering, large, community, toread}
}
Clauset, A.; Newman, M. & Moore, C.: Finding community structure in very large networks. In: Physical Review E 70 (2004), S. 066111
[Volltext]
@article{clauset-2004-70,
author = {Clauset, Aaron and Newman, M.E.J. and Moore, Cristopher},
title = {Finding community structure in very large networks},
journal = {Physical Review E},
year = {2004},
volume = {70},
pages = {066111},
url = {http://www.citebase.org/cgi-bin/citations?id=oai:arXiv.org:cond-mat/0408187},
keywords = {detection, newman, large, community, network, structure, gn}
}
Clauset, A.; Newman, M. E. J. & Moore, C.: Finding community structure in very large networks. , 2004
[Volltext]
The discovery and analysis of community structure in networks is a topic of
nsiderable recent interest within the physics community, but most methods
oposed so far are unsuitable for very large networks because of their
mputational cost. Here we present a hierarchical agglomeration algorithm for
tecting community structure which is faster than many competing algorithms:
s running time on a network with n vertices and m edges is O(m d log n) where
is the depth of the dendrogram describing the community structure. Many
al-world networks are sparse and hierarchical, with m ~ n and d ~ log n, in
ich case our algorithm runs in essentially linear time, O(n log^2 n). As an
ample of the application of this algorithm we use it to analyze a network of
ems for sale on the web-site of a large online retailer, items in the network
ing linked if they are frequently purchased by the same buyer. The network
s more than 400,000 vertices and 2 million edges. We show that our algorithm
n extract meaningful communities from this network, revealing large-scale
tterns present in the purchasing habits of customers.
@misc{citeulike:95936,
author = {Clauset, Aaron and Newman, M. E. J. and Moore, Cristopher},
title = {Finding community structure in very large networks},
year = {2004},
url = {http://arxiv.org/abs/cond-mat/0408187},
keywords = {clustering, large, community, network},
abstract = {The discovery and analysis of community structure in networks is a topic of
nsiderable recent interest within the physics community, but most methods
oposed so far are unsuitable for very large networks because of their
mputational cost. Here we present a hierarchical agglomeration algorithm for
tecting community structure which is faster than many competing algorithms:
s running time on a network with n vertices and m edges is O(m d log n) where
is the depth of the dendrogram describing the community structure. Many
al-world networks are sparse and hierarchical, with m ~ n and d ~ log n, in
ich case our algorithm runs in essentially linear time, O(n log^2 n). As an
ample of the application of this algorithm we use it to analyze a network of
ems for sale on the web-site of a large online retailer, items in the network
ing linked if they are frequently purchased by the same buyer. The network
s more than 400,000 vertices and 2 million edges. We show that our algorithm
n extract meaningful communities from this network, revealing large-scale
tterns present in the purchasing habits of customers.}
}