@article{clauset2009powerlaw, author = {Clauset, Aaron and Shalizi, Cosma Rohilla and Newman, M. E. J.}, doi = {10.1137/070710111}, eprint = {http://dx.doi.org/10.1137/070710111}, interhash = {9ce8658af5a6358a758bfdb819f73394}, intrahash = {c0097d202655474b1db6811ddea03410}, journal = {SIAM Review}, number = 4, pages = {661-703}, title = {Power-Law Distributions in Empirical Data}, url = {/brokenurl# http://dx.doi.org/10.1137/070710111 }, volume = 51, year = 2009 } @misc{clauset2007powerlaw, abstract = {Power-law distributions occur in many situations of scientific interest and have significant consequences for our understanding of natural and man-made phenomena. Unfortunately, the detection and characterization of power laws is complicated by the large fluctuations that occur in the tail of the distribution -- the part of the distribution representing large but rare events -- and by the difficulty of identifying the range over which power-law behavior holds. Commonly used methods for analyzing power-law data, such as least-squares fitting, can produce substantially inaccurate estimates of parameters for power-law distributions, and even in cases where such methods return accurate answers they are still unsatisfactory because they give no indication of whether the data obey a power law at all. Here we present a principled statistical framework for discerning and quantifying power-law behavior in empirical data. Our approach combines maximum-likelihood fitting methods with goodness-of-fit tests based on the Kolmogorov-Smirnov statistic and likelihood ratios. We evaluate the effectiveness of the approach with tests on synthetic data and give critical comparisons to previous approaches. We also apply the proposed methods to twenty-four real-world data sets from a range of different disciplines, each of which has been conjectured to follow a power-law distribution. In some cases we find these conjectures to be consistent with the data while in others the power law is ruled out.}, author = {Clauset, Aaron and Shalizi, Cosma Rohilla and Newman, M. E. J.}, doi = {10.1137/070710111}, interhash = {2e3bc5bbd7449589e8bfb580e8936d4b}, intrahash = {7da1624e601898dd74df839ce2daeb24}, note = {cite arxiv:0706.1062Comment: 43 pages, 11 figures, 7 tables, 4 appendices; code available at http://www.santafe.edu/~aaronc/powerlaws/}, title = {Power-law distributions in empirical data}, url = {http://arxiv.org/abs/0706.1062}, year = 2007 } @article{clauset2004, abstract = {Abstract: The discovery and analysis of community structure in networks is a topic of considerable recent interest within the physics community, but most methods proposed so far are unsuitable for very large networks because of their computational cost. Here we present a hierarchical agglomeration algorithm for detecting community structure which is faster than many competing algorithms: its running time on a network with n vertices and m edges is O(mdlog n) where d is the depth of the dendrogram describing the community structure. Many real-world networks are sparse and hierarchical, with m n and d log n, in which case our algorithm runs in essentially linear time, O(n log2 n). As an example of the application of this algorithm we use it to analyze a network of items for sale on the web-site of a large online retailer, items in the network being linked if they are frequently purchased by the same buyer. The network has more than 400 000 vertices and 2 million edges. We show that our algorithm can extract meaningful communities from this network, revealing large-scale patterns present in the purchasing habits of customers.}, author = {Clauset, Aaron and Newman, M. E. J. and and Cristopher Moore}, doi = {10.1103/PhysRevE.70.066111}, interhash = {69be2649d5ff3e66ad7dfadac4a1841f}, intrahash = {458e03efb1ef50a5338907bb58c426f6}, journal = {Physical Review E}, pages = {1-- 6}, title = {Finding community structure in very large networks}, year = 2004 } @article{clauset2009powerlaw, abstract = {Power-law distributions occur in many situations of scientific interest and have significant consequences for our understanding of natural and man-made phenomena. Unfortunately, the detection and characterization of power laws is complicated by the large fluctuations that occur in the tail of the distribution—the part of the distribution representing large but rare events—and by the difficulty of identifying the range over which power-law behavior holds. Commonly used methods for analyzing power-law data, such as least-squares fitting, can produce substantially inaccurate estimates of parameters for power-law distributions, and even in cases where such methods return accurate answers they are still unsatisfactory because they give no indication of whether the data obey a power law at all. Here we present a principled statistical framework for discerning and quantifying power-law behavior in empirical data. Our approach combines maximum-likelihood fitting methods with goodness-of-fit tests based on the Kolmogorov–Smirnov (KS) statistic and likelihood ratios. We evaluate the effectiveness of the approach with tests on synthetic data and give critical comparisons to previous approaches. We also apply the proposed methods to twenty-four real-world data sets from a range of different disciplines, each of which has been conjectured to follow a power-law distribution. In some cases we find these conjectures to be consistent with the data, while in others the power law is ruled out.}, author = {Clauset, Aaron and Shalizi, Cosma Rohilla and Newman, M. E. J.}, doi = {10.1137/070710111}, interhash = {9ce8658af5a6358a758bfdb819f73394}, intrahash = {c0097d202655474b1db6811ddea03410}, issn = {0036-1445}, journal = {SIAM Review}, number = 4, pages = {661--703}, publisher = {SIAM}, title = {Power-Law Distributions in Empirical Data}, url = {http://link.aip.org/link/?SIR/51/661/1}, volume = 51, year = 2009 } @article{clauset2009powerlaw, abstract = {Power-law distributions occur in many situations of scientific interest and have significant consequences for our understanding of natural and man-made phenomena. Unfortunately, the detection and characterization of power laws is complicated by the large fluctuations that occur in the tail of the distribution—the part of the distribution representing large but rare events—and by the difficulty of identifying the range over which power-law behavior holds. Commonly used methods for analyzing power-law data, such as least-squares fitting, can produce substantially inaccurate estimates of parameters for power-law distributions, and even in cases where such methods return accurate answers they are still unsatisfactory because they give no indication of whether the data obey a power law at all. Here we present a principled statistical framework for discerning and quantifying power-law behavior in empirical data. Our approach combines maximum-likelihood fitting methods with goodness-of-fit tests based on the Kolmogorov–Smirnov (KS) statistic and likelihood ratios. We evaluate the effectiveness of the approach with tests on synthetic data and give critical comparisons to previous approaches. We also apply the proposed methods to twenty-four real-world data sets from a range of different disciplines, each of which has been conjectured to follow a power-law distribution. In some cases we find these conjectures to be consistent with the data, while in others the power law is ruled out.}, author = {Clauset, Aaron and Shalizi, Cosma Rohilla and Newman, M. E. J.}, doi = {10.1137/070710111}, interhash = {9ce8658af5a6358a758bfdb819f73394}, intrahash = {c0097d202655474b1db6811ddea03410}, issn = {0036-1445}, journal = {SIAM Review}, number = 4, pages = {661--703}, publisher = {SIAM}, title = {Power-Law Distributions in Empirical Data}, url = {http://link.aip.org/link/?SIR/51/661/1}, volume = 51, year = 2009 } @misc{Clauset2007, abstract = { Power-law distributions occur in many situations of scientific interest and have significant consequences for our understanding of natural and man-made phenomena. Unfortunately, the detection and characterization of power laws is complicated by the large fluctuations that occur in the tail of the distribution -- the part of the distribution representing large but rare events -- and by the difficulty of identifying the range over which power-law behavior holds. Commonly used methods for analyzing power-law data, such as least-squares fitting, can produce substantially inaccurate estimates of parameters for power-law distributions, and even in cases where such methods return accurate answers they are still unsatisfactory because they give no indication of whether the data obey a power law at all. Here we present a principled statistical framework for discerning and quantifying power-law behavior in empirical data. Our approach combines maximum-likelihood fitting methods with goodness-of-fit tests based on the Kolmogorov-Smirnov statistic and likelihood ratios. We evaluate the effectiveness of the approach with tests on synthetic data and give critical comparisons to previous approaches. We also apply the proposed methods to twenty-four real-world data sets from a range of different disciplines, each of which has been conjectured to follow a power-law distribution. In some cases we find these conjectures to be consistent with the data while in others the power law is ruled out. }, author = {Clauset, Aaron and Shalizi, Cosma Rohilla and Newman, M. E. J.}, interhash = {2e3bc5bbd7449589e8bfb580e8936d4b}, intrahash = {7da1624e601898dd74df839ce2daeb24}, note = {cite arxiv:0706.1062 Comment: 43 pages, 11 figures, 7 tables, 4 appendices; code available at http://www.santafe.edu/~aaronc/powerlaws/}, title = {Power-law distributions in empirical data}, url = {http://arxiv.org/abs/0706.1062}, year = 2007 } @article{clauset-2004-70, author = {Clauset, Aaron and Newman, M. E. J. and Moore, Cristopher}, interhash = {2c68e3c981a00380692a3b0b661d7cfd}, intrahash = {0ea285bfc0f5a46ffec8a213e5133ba6}, journal = {Physical Review E}, pages = 066111, title = {Finding community structure in very large networks}, url = {http://www.citebase.org/abstract?id=oai:arXiv.org:cond-mat/0408187}, volume = 70, year = 2004 } @article{clauset-2004-70, author = {Clauset, Aaron and Newman, M.E.J. and Moore, Cristopher}, interhash = {2c68e3c981a00380692a3b0b661d7cfd}, intrahash = {a35d69f1d41a6cdd0632c5e1cadb4d44}, journal = {Physical Review E}, pages = 066111, title = {Finding community structure in very large networks}, url = {http://www.citebase.org/cgi-bin/citations?id=oai:arXiv.org:cond-mat/0408187}, volume = 70, year = 2004 } @misc{citeulike:95936, abstract = {The discovery and analysis of community structure in networks is a topic of considerable recent interest within the physics community, but most methods proposed so far are unsuitable for very large networks because of their computational cost. Here we present a hierarchical agglomeration algorithm for detecting community structure which is faster than many competing algorithms: its running time on a network with n vertices and m edges is O(m d log n) where d is the depth of the dendrogram describing the community structure. Many real-world networks are sparse and hierarchical, with m ~ n and d ~ log n, in which case our algorithm runs in essentially linear time, O(n log^2 n). As an example of the application of this algorithm we use it to analyze a network of items for sale on the web-site of a large online retailer, items in the network being linked if they are frequently purchased by the same buyer. The network has more than 400,000 vertices and 2 million edges. We show that our algorithm can extract meaningful communities from this network, revealing large-scale patterns present in the purchasing habits of customers.}, author = {Clauset, Aaron and Newman, M. E. J. and Moore, Cristopher}, citeulike-article-id = {95936}, eprint = {cond-mat/0408187}, interhash = {2c68e3c981a00380692a3b0b661d7cfd}, intrahash = {f9a12630a6d31d576ea5222219a4cf0b}, month = {August}, priority = {0}, title = {Finding community structure in very large networks}, url = {http://arxiv.org/abs/cond-mat/0408187}, year = 2004 }