@article{newman2004finding, author = {Newman, M. E. J. and Girvan, M.}, doi = {10.1103/PhysRevE.69.026113}, interhash = {b9145040e35ccb4d2a0ce18105e64ff4}, intrahash = {1dbc30a1818aa74973f387162e485443}, journal = {Phys. Rev. E}, month = feb, number = 2, numpages = {15}, pages = 026113, publisher = {American Physical Society}, title = {Finding and evaluating community structure in networks}, url = {http://link.aps.org/doi/10.1103/PhysRevE.69.026113}, volume = 69, year = 2004 } @article{clauset2009powerlaw, author = {Clauset, Aaron and Shalizi, Cosma Rohilla and Newman, M. E. J.}, doi = {10.1137/070710111}, eprint = {http://dx.doi.org/10.1137/070710111}, interhash = {9ce8658af5a6358a758bfdb819f73394}, intrahash = {c0097d202655474b1db6811ddea03410}, journal = {SIAM Review}, number = 4, pages = {661-703}, title = {Power-Law Distributions in Empirical Data}, url = {/brokenurl# http://dx.doi.org/10.1137/070710111 }, volume = 51, year = 2009 } @misc{clauset2007powerlaw, abstract = {Power-law distributions occur in many situations of scientific interest and have significant consequences for our understanding of natural and man-made phenomena. Unfortunately, the detection and characterization of power laws is complicated by the large fluctuations that occur in the tail of the distribution -- the part of the distribution representing large but rare events -- and by the difficulty of identifying the range over which power-law behavior holds. Commonly used methods for analyzing power-law data, such as least-squares fitting, can produce substantially inaccurate estimates of parameters for power-law distributions, and even in cases where such methods return accurate answers they are still unsatisfactory because they give no indication of whether the data obey a power law at all. Here we present a principled statistical framework for discerning and quantifying power-law behavior in empirical data. Our approach combines maximum-likelihood fitting methods with goodness-of-fit tests based on the Kolmogorov-Smirnov statistic and likelihood ratios. We evaluate the effectiveness of the approach with tests on synthetic data and give critical comparisons to previous approaches. We also apply the proposed methods to twenty-four real-world data sets from a range of different disciplines, each of which has been conjectured to follow a power-law distribution. In some cases we find these conjectures to be consistent with the data while in others the power law is ruled out.}, author = {Clauset, Aaron and Shalizi, Cosma Rohilla and Newman, M. E. J.}, doi = {10.1137/070710111}, interhash = {2e3bc5bbd7449589e8bfb580e8936d4b}, intrahash = {7da1624e601898dd74df839ce2daeb24}, note = {cite arxiv:0706.1062Comment: 43 pages, 11 figures, 7 tables, 4 appendices; code available at http://www.santafe.edu/~aaronc/powerlaws/}, title = {Power-law distributions in empirical data}, url = {http://arxiv.org/abs/0706.1062}, year = 2007 } @article{PhysRevE.64.016131, author = {Newman, M. E. J.}, doi = {10.1103/PhysRevE.64.016131}, interhash = {c2e3ef110ba67dd66249c354725aa680}, intrahash = {c4ec4bf95bf426882af0061bee863511}, journal = {Phys. Rev. E}, month = jun, number = 1, numpages = {8}, pages = 016131, publisher = {American Physical Society}, title = {Scientific collaboration networks. I. Network construction and fundamental results}, url = {http://link.aps.org/doi/10.1103/PhysRevE.64.016131}, volume = 64, year = 2001 } @article{NewGir04, author = {Newman, M. E. J. and Girvan, M.}, interhash = {b9145040e35ccb4d2a0ce18105e64ff4}, intrahash = {63176454110326cb664a25fc249c8f7b}, journal = {Physical Review}, number = 026113, title = {Finding and evaluating community structure in networks}, volume = {E 69}, year = 2004 } @article{clauset2004, abstract = {Abstract: The discovery and analysis of community structure in networks is a topic of considerable recent interest within the physics community, but most methods proposed so far are unsuitable for very large networks because of their computational cost. Here we present a hierarchical agglomeration algorithm for detecting community structure which is faster than many competing algorithms: its running time on a network with n vertices and m edges is O(mdlog n) where d is the depth of the dendrogram describing the community structure. Many real-world networks are sparse and hierarchical, with m n and d log n, in which case our algorithm runs in essentially linear time, O(n log2 n). As an example of the application of this algorithm we use it to analyze a network of items for sale on the web-site of a large online retailer, items in the network being linked if they are frequently purchased by the same buyer. The network has more than 400 000 vertices and 2 million edges. We show that our algorithm can extract meaningful communities from this network, revealing large-scale patterns present in the purchasing habits of customers.}, author = {Clauset, Aaron and Newman, M. E. J. and and Cristopher Moore}, doi = {10.1103/PhysRevE.70.066111}, interhash = {69be2649d5ff3e66ad7dfadac4a1841f}, intrahash = {458e03efb1ef50a5338907bb58c426f6}, journal = {Physical Review E}, pages = {1-- 6}, title = {Finding community structure in very large networks}, year = 2004 } @article{girvan2002, author = {Girvan, M. and Newman, M. E. J.}, file = {full paper:Girvan2002.pdf:PDF}, groups = {public}, interhash = {ecd7a48a37f660ab421472140168c892}, intrahash = {da908a587645fb450af65cbbb63323a7}, journal = {Proceedings of the National Academy of Sciences}, number = 12, pages = {7821-7826}, title = {Community structure in social and biological networks}, username = {lantiq}, volume = 99, year = 2002 } @article{clauset2009powerlaw, abstract = {Power-law distributions occur in many situations of scientific interest and have significant consequences for our understanding of natural and man-made phenomena. Unfortunately, the detection and characterization of power laws is complicated by the large fluctuations that occur in the tail of the distribution—the part of the distribution representing large but rare events—and by the difficulty of identifying the range over which power-law behavior holds. Commonly used methods for analyzing power-law data, such as least-squares fitting, can produce substantially inaccurate estimates of parameters for power-law distributions, and even in cases where such methods return accurate answers they are still unsatisfactory because they give no indication of whether the data obey a power law at all. Here we present a principled statistical framework for discerning and quantifying power-law behavior in empirical data. Our approach combines maximum-likelihood fitting methods with goodness-of-fit tests based on the Kolmogorov–Smirnov (KS) statistic and likelihood ratios. We evaluate the effectiveness of the approach with tests on synthetic data and give critical comparisons to previous approaches. We also apply the proposed methods to twenty-four real-world data sets from a range of different disciplines, each of which has been conjectured to follow a power-law distribution. In some cases we find these conjectures to be consistent with the data, while in others the power law is ruled out.}, author = {Clauset, Aaron and Shalizi, Cosma Rohilla and Newman, M. E. J.}, doi = {10.1137/070710111}, interhash = {9ce8658af5a6358a758bfdb819f73394}, intrahash = {c0097d202655474b1db6811ddea03410}, issn = {0036-1445}, journal = {SIAM Review}, number = 4, pages = {661--703}, publisher = {SIAM}, title = {Power-Law Distributions in Empirical Data}, url = {http://link.aip.org/link/?SIR/51/661/1}, volume = 51, year = 2009 } @misc{newman2004power, abstract = {When the probability of measuring a particular value of some quantity varies inversely as a power of that value, the quantity is said to follow a power law, also known variously as Zipf's law or the Pareto distribution. Power laws appear widely in physics, biology, earth and planetary sciences, economics and finance, computer science, demography and the social sciences. For instance, the distributions of the sizes of cities, earthquakes, solar flares, moon craters, wars and people's personal fortunes all appear to follow power laws. The origin of power-law behaviour has been a topic of debate in the scientific community for more than a century. Here we review some of the empirical evidence for the existence of power-law forms and the theories proposed to explain them. }, author = {Newman, M. E. J.}, interhash = {0e71ef0a12837211faa22d9f16eda4a8}, intrahash = {561772806731f6afcdc0c707e34662dd}, title = {Power laws, Pareto distributions and Zipf's law}, url = {http://arxiv.org/abs/cond-mat/0412004}, year = 2004 } @misc{newman2004power, abstract = {When the probability of measuring a particular value of some quantity varies inversely as a power of that value, the quantity is said to follow a power law, also known variously as Zipf's law or the Pareto distribution. Power laws appear widely in physics, biology, earth and planetary sciences, economics and finance, computer science, demography and the social sciences. For instance, the distributions of the sizes of cities, earthquakes, solar flares, moon craters, wars and people's personal fortunes all appear to follow power laws. The origin of power-law behaviour has been a topic of debate in the scientific community for more than a century. Here we review some of the empirical evidence for the existence of power-law forms and the theories proposed to explain them. }, author = {Newman, M. E. J.}, interhash = {0e71ef0a12837211faa22d9f16eda4a8}, intrahash = {561772806731f6afcdc0c707e34662dd}, title = {Power laws, Pareto distributions and Zipf's law}, url = {http://arxiv.org/abs/cond-mat/0412004}, year = 2004 } @article{newman2006modularity, abstract = {Many networks of interest in the sciences, including social networks, computer networks, and metabolic and regulatory networks, are found to divide naturally into communities or modules. The problem of detecting and characterizing this community structure is one of the outstanding issues in the study of networked systems. One highly effective approach is the optimization of the quality function known as “modularity” over the possible divisions of a network. Here I show that the modularity can be expressed in terms of the eigenvectors of a characteristic matrix for the network, which I call the modularity matrix, and that this expression leads to a spectral algorithm for community detection that returns results of demonstrably higher quality than competing methods in shorter running times. I illustrate the method with applications to several published network data sets.}, author = {Newman, M. E. J.}, doi = {10.1073/pnas.0601602103}, interhash = {e664336d414a1e21d89f30cc56f5e739}, intrahash = {5dd9d0c2155f242393e63547d8a2347f}, journal = {Proceedings of the National Academy of Sciences}, number = 23, pages = {8577--8582}, title = {Modularity and community structure in networks}, volume = 103, year = 2006 } @article{clauset2009powerlaw, abstract = {Power-law distributions occur in many situations of scientific interest and have significant consequences for our understanding of natural and man-made phenomena. Unfortunately, the detection and characterization of power laws is complicated by the large fluctuations that occur in the tail of the distribution—the part of the distribution representing large but rare events—and by the difficulty of identifying the range over which power-law behavior holds. Commonly used methods for analyzing power-law data, such as least-squares fitting, can produce substantially inaccurate estimates of parameters for power-law distributions, and even in cases where such methods return accurate answers they are still unsatisfactory because they give no indication of whether the data obey a power law at all. Here we present a principled statistical framework for discerning and quantifying power-law behavior in empirical data. Our approach combines maximum-likelihood fitting methods with goodness-of-fit tests based on the Kolmogorov–Smirnov (KS) statistic and likelihood ratios. We evaluate the effectiveness of the approach with tests on synthetic data and give critical comparisons to previous approaches. We also apply the proposed methods to twenty-four real-world data sets from a range of different disciplines, each of which has been conjectured to follow a power-law distribution. In some cases we find these conjectures to be consistent with the data, while in others the power law is ruled out.}, author = {Clauset, Aaron and Shalizi, Cosma Rohilla and Newman, M. E. J.}, doi = {10.1137/070710111}, interhash = {9ce8658af5a6358a758bfdb819f73394}, intrahash = {c0097d202655474b1db6811ddea03410}, issn = {0036-1445}, journal = {SIAM Review}, number = 4, pages = {661--703}, publisher = {SIAM}, title = {Power-Law Distributions in Empirical Data}, url = {http://link.aip.org/link/?SIR/51/661/1}, volume = 51, year = 2009 } @article{newman2001structure, abstract = {The structure of scientific collaboration networks is investigated. Two scientists are considered connected if they have authored a paper together and explicit networks of such connections are constructed by using data drawn from a number of databases, including MEDLINE biomedical research, the Los Alamos e-Print Archive physics, and NCSTRL computer science. I show that these collaboration networks form ” small worlds,” in which randomly chosen pairs of scientists are typically separated by only a short path of intermediate acquaintances. I further give results for mean and distribution of numbers of collaborators of authors, demonstrate the presence of clustering in the networks, and highlight a number of apparent differences in the patterns of collaboration between the fields studied.}, author = {Newman, M. E. J.}, doi = {10.1073/pnas.98.2.404}, eprint = {http://www.pnas.org/content/98/2/404.full.pdf+html}, interhash = {8c5edd915b304ae09fc08e0a51dfd5e9}, intrahash = {a4d3149c7198762a99102935da4d1bdb}, journal = {Proceedings of the National Academy of Sciences}, number = 2, pages = {404--409}, title = {The structure of scientific collaboration networks}, url = {http://www.pnas.org/content/98/2/404.abstract}, volume = 98, year = 2001 } @article{Leicht08community, author = {Leicht, E. A. and Newman, M. E. J.}, doi = {10.1103/PhysRevLett.100.118703}, interhash = {825411a28bde71cda1c9087fc329d963}, intrahash = {93726cc0540f75ee1cb515b2923d69e8}, journal = {Phys. Rev. Lett.}, month = mar, number = 11, numpages = {4}, pages = 118703, publisher = {American Physical Society}, title = {Community Structure in Directed Networks}, volume = 100, year = 2008 } @misc{ghoshal2009random, abstract = { In the last few years we have witnessed the emergence, primarily in on-linecommunities, of new types of social networks that require for theirrepresentation more complex graph structures than have been employed in thepast. One example is the folksonomy, a tripartite structure of users,resources, and tags -- labels collaboratively applied by the users to theresources in order to impart meaningful structure on an otherwiseundifferentiated database. Here we propose a mathematical model of suchtripartite structures which represents them as random hypergraphs. We show thatit is possible to calculate many properties of this model exactly in the limitof large network size and we compare the results against observations of a realfolksonomy, that of the on-line photography web site Flickr. We show that insome cases the model matches the properties of the observed network well, whilein others there are significant differences, which we find to be attributableto the practice of multiple tagging, i.e., the application by a single user ofmany tags to one resource, or one tag to many resources.}, author = {Ghoshal, Gourab and Zlatic, Vinko and Caldarelli, Guido and Newman, M. E. J.}, interhash = {06e785ad79729e23e326b9c572aa7c56}, intrahash = {a1533c3b12096f71a2b6b6970eb9934d}, note = {cite arxiv:0903.0419Comment: 11 pages, 7 figures}, title = {Random hypergraphs and their applications}, url = {http://arxiv.org/abs/0903.0419}, year = 2009 } @article{newman2004finding, abstract = {We propose and study a set of algorithms for discovering community structure in networks-natural divisions of network nodes into densely connected subgroups. Our algorithms all share two definitive features: first, they involve iterative removal of edges from the network to split it into communities, the edges removed being identified using any one of a number of possible "betweenness" measures, and second, these measures are, crucially, recalculated after each removal. We also propose a measure for the strength of the community structure found by our algorithms, which gives us an objective metric for choosing the number of communities into which a network should be divided. We demonstrate that our algorithms are highly effective at discovering community structure in both computer-generated and real-world network data, and show how they can be used to shed light on the sometimes dauntingly complex structure of networked systems.}, author = {Newman, M E and Girvan, M}, interhash = {b9145040e35ccb4d2a0ce18105e64ff4}, intrahash = {0c522f0a01f72638e70916f1144746e6}, journal = {Phys Rev E Stat Nonlin Soft Matter Phys}, month = Feb, number = 2, pages = {026113.1-15}, pmid = {14995526}, title = {Finding and evaluating community structure in networks}, url = {http://www.ncbi.nlm.nih.gov/pubmed/14995526}, volume = 69, year = 2004 } @misc{newman2003structure, abstract = {Inspired by empirical studies of networked systems such as the Internet, social networks, and biological networks, researchers have in recent years developed a variety of techniques and models to help us understand or predict the behavior of these systems. Here we review developments in this field, including such concepts as the small-world effect, degree distributions, clustering, network correlations, random graph models, models of network growth and preferential attachment, and dynamical processes taking place on networks.}, author = {Newman, M. E. J.}, file = {newman2003structure.pdf:newman2003structure.pdf:PDF}, interhash = {7bedd01cb4c06af9f5200b0fb3faa571}, intrahash = {d53568209eef08fb0a8734cf34c59a71}, lastdatemodified = {2006-10-07}, lastname = {Newman}, month = {March}, own = {notown}, pdf = {newman03-structure.pdf}, read = {notread}, title = {The structure and function of complex networks}, url = {http://arxiv.org/abs/cond-mat/0303516}, year = 2003 } @article{PhysRevLett.100.118703, author = {Leicht, E. A. and Newman, M. E. J.}, doi = {10.1103/PhysRevLett.100.118703}, interhash = {825411a28bde71cda1c9087fc329d963}, intrahash = {93726cc0540f75ee1cb515b2923d69e8}, journal = {Phys. Rev. Lett.}, month = mar, number = 11, numpages = {4}, pages = 118703, publisher = {American Physical Society}, title = {Community Structure in Directed Networks}, url = {http://prl.aps.org/abstract/PRL/v100/i11/e118703}, volume = 100, year = 2008 } @article{Newman04communityStructure, abstract = {We propose and study a set of algorithms for discovering community structure in networks-natural divisions of network nodes into densely connected subgroups. Our algorithms all share two definitive features: first, they involve iterative removal of edges from the network to split it into communities, the edges removed being identified using any one of a number of possible "betweenness" measures, and second, these measures are, crucially, recalculated after each removal. We also propose a measure for the strength of the community structure found by our algorithms, which gives us an objective metric for choosing the number of communities into which a network should be divided. We demonstrate that our algorithms are highly effective at discovering community structure in both computer-generated and real-world network data, and show how they can be used to shed light on the sometimes dauntingly complex structure of networked systems.}, author = {Newman, M E and Girvan, M}, interhash = {b9145040e35ccb4d2a0ce18105e64ff4}, intrahash = {0c522f0a01f72638e70916f1144746e6}, journal = {Phys Rev E Stat Nonlin Soft Matter Phys}, month = feb, number = 2, pages = {026113.1-15}, pmid = {14995526}, title = {Finding and evaluating community structure in networks}, url = {http://www.ncbi.nlm.nih.gov/pubmed/14995526}, volume = 69, year = 2004 } @article{New03, author = {Newman, M. E. J.}, interhash = {7bedd01cb4c06af9f5200b0fb3faa571}, intrahash = {f0de28071b8ee1c3675e67c7538e806a}, journal = {SIAM Review}, number = 2, pages = {167-256}, title = {The structure and function of complex networks}, volume = 45, year = 2003 }