@article{newman2006modularity, abstract = {Many networks of interest in the sciences, including social networks, computer networks, and metabolic and regulatory networks, are found to divide naturally into communities or modules. The problem of detecting and characterizing this community structure is one of the outstanding issues in the study of networked systems. One highly effective approach is the optimization of the quality function known as “modularity” over the possible divisions of a network. Here I show that the modularity can be expressed in terms of the eigenvectors of a characteristic matrix for the network, which I call the modularity matrix, and that this expression leads to a spectral algorithm for community detection that returns results of demonstrably higher quality than competing methods in shorter running times. I illustrate the method with applications to several published network data sets.}, author = {Newman, M. E. J.}, doi = {10.1073/pnas.0601602103}, interhash = {e664336d414a1e21d89f30cc56f5e739}, intrahash = {5dd9d0c2155f242393e63547d8a2347f}, journal = {Proceedings of the National Academy of Sciences}, number = 23, pages = {8577--8582}, title = {Modularity and community structure in networks}, volume = 103, year = 2006 } @article{Leicht08community, author = {Leicht, E. A. and Newman, M. E. J.}, doi = {10.1103/PhysRevLett.100.118703}, interhash = {825411a28bde71cda1c9087fc329d963}, intrahash = {93726cc0540f75ee1cb515b2923d69e8}, journal = {Phys. Rev. Lett.}, month = mar, number = 11, numpages = {4}, pages = 118703, publisher = {American Physical Society}, title = {Community Structure in Directed Networks}, volume = 100, year = 2008 } @inproceedings{conf/icdm/TangWL09, author = {Tang, Lei and Wang, Xufei and Liu, Huan}, booktitle = {ICDM}, crossref = {conf/icdm/2009}, date = {2010-01-27}, editor = {Wang, Wei and Kargupta, Hillol and Ranka, Sanjay and Yu, Philip S. and Wu, Xindong}, ee = {http://doi.ieeecomputersociety.org/10.1109/ICDM.2009.20}, interhash = {e689d9e29e78d2c869896121ad37a772}, intrahash = {e54ea4c1636d8589d7a7d119291cb1ea}, isbn = {978-0-7695-3895-2}, pages = {503-512}, publisher = {IEEE Computer Society}, title = {Uncoverning Groups via Heterogeneous Interaction Analysis.}, url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.150.7384&rep=rep1&type=pdf}, year = 2009 } @inproceedings{Detecting_Commmunities_via_Simultaneous_Clustering_of_Graphs_and_Folksonomies, author = {Java, Akshay and Joshi, Anupam and Finin, Tim}, booktitle = {WebKDD 2008 Workshop on Web Mining and Web Usage Analysis}, interhash = {acfec953843b168e61e2e167e29b4c3d}, intrahash = {645abd6b3191a2a6e844d7542651ed1c}, month = {August}, note = {To Appear}, title = {Detecting Commmunities via Simultaneous Clustering of Graphs and Folksonomies}, year = 2008 } @inproceedings{conf/kdd/ChiSZHT07, author = {Chi, Yun and Song, Xiaodan and Zhou, Dengyong and Hino, Koji and Tseng, Belle L.}, booktitle = {KDD}, crossref = {conf/kdd/2007}, date = {2007-08-23}, editor = {Berkhin, Pavel and Caruana, Rich and Wu, Xindong}, ee = {http://doi.acm.org/10.1145/1281192.1281212}, interhash = {542ce3968b0d75048000f35669a7fb83}, intrahash = {0829ef077986e88540a96bd8ba154d86}, isbn = {978-1-59593-609-7}, pages = {153-162}, publisher = {ACM}, title = {Evolutionary spectral clustering by incorporating temporal smoothness.}, url = {http://dblp.uni-trier.de/db/conf/kdd/kdd2007.html#ChiSZHT07}, year = 2007 } @inproceedings{conf/icml/WagstaffCRS01, author = {Wagstaff, Kiri and Cardie, Claire and Rogers, Seth and Schrödl, Stefan}, booktitle = {ICML}, crossref = {conf/icml/2001}, date = {2002-11-27}, editor = {Brodley, Carla E. and Danyluk, Andrea Pohoreckyj}, interhash = {10d8a7c9e5b5f9cf0d0848cf8c10f604}, intrahash = {c0c3565625192ee6748b52d9d4f3b526}, isbn = {1-55860-778-1}, pages = {577-584}, publisher = {Morgan Kaufmann}, title = {Constrained K-means Clustering with Background Knowledge.}, url = {http://dblp.uni-trier.de/db/conf/icml/icml2001.html#WagstaffCRS01}, year = 2001 } @inproceedings{conf/cvpr/ShiM97, author = {Shi, Jianbo and Malik, Jitendra}, booktitle = {CVPR}, ee = {http://computer.org/proceedings/cvpr/7822/78220731abs.htm}, interhash = {600345c3af56da066873a30c9971a615}, intrahash = {bc4607ac2084911e4b1ba23b323f649a}, pages = {731-737}, title = {Normalized Cuts and Image Segmentation.}, url = {http://dblp.uni-trier.de/db/conf/cvpr/cvpr1997.html#ShiM97}, year = 1997 } @inproceedings{grahl07conceptualKdml, author = {Grahl, Miranda and Hotho, Andreas and Stumme, Gerd}, booktitle = {Workshop Proceedings of Lernen - Wissensentdeckung - Adaptivität (LWA 2007)}, editor = {Hinneburg, Alexander}, interhash = {9c3bb05456bf11bcd88a1135de51f7d9}, intrahash = {6d5188d66564fe4ed7386e28868504de}, isbn = {978-3-86010-907-6}, month = sep, pages = {50-54}, publisher = {Martin-Luther-Universität Halle-Wittenberg}, title = {Conceptual Clustering of Social Bookmark Sites}, url = {http://www.tagora-project.eu/wp-content/2007/06/grahl_iknow07.pdf }, vgwort = {14}, year = 2007 } @inproceedings{Begelman2006, address = {Edinburgh}, author = {Begelman, Grigory and Keller, Philipp and Smadja, Frank}, booktitle = {Proceedings of the WWW 2006 Workshop on Collaborative Web Tagging Workshop}, interhash = {ffacd9d40f6cba1aa8140f501c2a1802}, intrahash = {95449b3d4b12e8930d529e1e22d51e04}, month = may, pdf = {http://www.rawsugar.com/www2006/20.pdf}, timestamp = {2007.04.11}, title = {Automated Tag Clustering: Improving search and exploration in the tag space}, url = {http://www.rawsugar.com/www2006/taggingworkshopschedule.html}, year = 2006 } @article{cattuto2007, author = {Cattuto, C. and Schmitz, C. and Baldassarri, A. and Servedio, V. D. P. and Loreto, V. and Hotho, A. and Grahl, M. and Stumme, G.}, interhash = {fc5f2df61d28bc99b7e15029da125588}, intrahash = {d87e198a6d564ae8a8fe151e0a96fa0f}, journal = {AI Communications}, number = 4, pages = {245 - 262}, title = {Network Properties of Folksonomies}, url = {http://www.kde.cs.uni-kassel.de/hotho/pub/2007/aicomm_2007_folksonomy_clustering.pdf}, vgwort = {67}, volume = 20, year = 2007 } @article{partitioning89, author = {Pothen, A. and Simon, H.D. and Liou, K.P.}, interhash = {2f6596b5093c7d16e3817dadf39b8aa2}, intrahash = {7444914fc73fc8ec12a67e5e172c34c0}, journal = {SIAM J. MATRIX ANAL. APPLIC.}, number = 3, pages = {430--452}, title = {{Partitioning Sparse Matrices with Eigenvectors of Graphs}}, url = {http://ntrs.nasa.gov/archive/nasa/casi.ntrs.nasa.gov/19970011963_1997016998.pdf }, volume = 11, year = 1990 } @techreport{Spielman:1996, address = {Berkeley, CA, USA}, author = {Spielman, Daniel A. and Teng, Shang}, interhash = {83f3d15605beda920551830ccac3d79a}, intrahash = {06b1b19e0a29a145555cb1526716c451}, publisher = {University of California at Berkeley}, title = {Spectral Partitioning Works: Planar Graphs and Finite Element Meshes}, year = 1996 } @article{donath1973lbp, author = {Donath, W.E. and Hoffman, A.J.}, interhash = {ff38bdeb46caa114a3efad739319973f}, intrahash = {7cb789bd22dfa8ccdd2abdd30121dfc9}, journal = {IBM Journal of Research and Development}, number = 5, pages = {420--425}, title = {{Lower bounds for the partitioning of graphs}}, volume = 17, year = 1973 } @inproceedings{Ng01onspectral, abstract = {Despite many empirical successes of spectral clustering methods| algorithms that cluster points using eigenvectors of matrices derived from the data|there are several unresolved issues. First, there are a wide variety of algorithms that use the eigenvectors in slightly dierent ways. Second, many of these algorithms have no proof that they will actually compute a reasonable clustering. In this paper, we present a simple spectral clustering algorithm that can be implemented using a few lines of Matlab. Using tools from matrix perturbation theory, we analyze the algorithm, and give conditions under which it can be expected to do well. We also show surprisingly good experimental results on a number of challenging clustering problems. 1}, author = {Ng, Andrew Y. and Jordan, Michael I. and Weiss, Yair}, booktitle = {Advances in Neural Information Processing Systems 14}, interhash = {b72c97e659127fc653a0d51143d85b0c}, intrahash = {7485849e42418ee5ceefb45dc6eb603c}, pages = {849--856}, publisher = {MIT Press}, title = {On spectral clustering: Analysis and an algorithm}, year = 2001 } @inproceedings{1066236, abstract = {In this paper we introduce a novel algorithm called TRICLUSTER, for mining coherent clusters in three-dimensional (3D) gene expression datasets. TRICLUSTER can mine arbitrarily positioned and overlapping clusters, and depending on different parameter values, it can mine different types of clusters, including those with constant or similar values along each dimension, as well as scaling and shifting expression patterns. TRICLUSTER relies on graph-based approach to mine all valid clusters. For each time slice, i.e., a gene×sample matrix, it constructs the range multigraph, a compact representation of all similar value ranges between any two sample columns. It then searches for constrained maximal cliques in this multigraph to yield the set of bi-clusters for this time slice. Then TRICLUSTER constructs another graph using the biclusters (as vertices) from each time slice; mining cliques from this graph yields the final set of triclusters. Optionally, TRICLUSTER merges/deletes some clusters having large overlaps. We present a useful set of metrics to evaluate the clustering quality, and we show that TRICLUSTER can find significant triclusters in the real microarray datasets.}, address = {New York, NY, USA}, author = {Zhao, Lizhuang and Zaki, Mohammed J.}, booktitle = {SIGMOD '05: Proceedings of the 2005 ACM SIGMOD international conference on Management of data}, doi = {http://doi.acm.org/10.1145/1066157.1066236}, interhash = {a4e66b4d48599fe17da1a0be9da4859d}, intrahash = {f99143dfa553745fb2e7d7f96a8b4bb7}, isbn = {1-59593-060-4}, location = {Baltimore, Maryland}, pages = {694--705}, publisher = {ACM}, title = {TRICLUSTER: an effective algorithm for mining coherent clusters in 3D microarray data}, url = {http://portal.acm.org/citation.cfm?id=1066157.1066236}, year = 2005 } @article{Boley97principaldirection, abstract = {We propose a new algorithm capable of partitioning a set of documents or other samples based on an embedding in a high dimensional Euclidean space (i.e. in which every document is a vector of real numbers). The method is unusual in that it is divisive, as opposed to agglomerative, and operates by repeatedly splitting clusters into smaller clusters. The splits are not based on any distance or similarity measure. The documents are assembled in to a matrix which is very sparse. It is this sparsity that permits the algorithm to be very efficient. The performance of the method is illustrated with a set of text documents obtained from the World Wide Web. Some possible extensions are proposed for further investigation.}, author = {Boley, Daniel}, interhash = {281afd06bd3e21ec3ef212da4ec18ee0}, intrahash = {bca740460f14035af773f665887b6fa4}, journal = {Data Mining and Knowledge Discovery}, pages = {325--344}, title = {Principal Direction Divisive Partitioning}, volume = 2, year = 1997 } @inproceedings{Approximating2008Java, abstract = {In many social media applications, a small fraction of the members are highly linked while most are sparsely connected to the network. Such a skewed distribution is sometimes referred to as the"long tail". Popular applications like meme trackers and content aggregators mine for information from only the popular blogs located at the head of this curve. On the other hand, the long tail contains large volumes of interesting information and niches. The question we address in this work is how best to approximate the community membership of entities in the long tail using only a small percentage of the entire graph structure. Our technique utilizes basic linear algebra manipulations and spectral methods. It has the advantage of quickly and efficiently finding a reasonable approximation of the community structure of the overall network. Such a method has significant applications in blog analysis engines as well as social media monitoring tools in general. }, author = {Java, Akshay and Joshi, Anupam and FininBook, Tim}, booktitle = {Proceedings of the Second International Conference on Weblogs and Social Media(ICWSM 2008)}, date = {2008 Abstract:}, interhash = {ede357e110fee8803dc181d262f30087}, intrahash = {386f36679c111f30e37ced272d5b355c}, publisher = {AAAI Press}, title = {Approximating the Community Structure of the Long Tail}, url = {http://ebiquity.umbc.edu/paper/html/id/381/Approximating-the-Community-Structure-of-the-Long-Tail}, year = 2008 } @article{papa2007hpa, author = {Papa, D.A. and Markov, I.L.}, interhash = {68b9025134cf6e3a4fb2fe8c3194e2b4}, intrahash = {1c9083525938ab35d2a35df850612cb1}, journal = {Handbook of Approximation Algorithms and Metaheuristics}, publisher = {Chapman \& Hall/CRC}, title = {{Hypergraph partitioning and clustering}}, year = 2007 } @misc{Vazquez2008, abstract = { Data clustering, including problems such as finding network communities, can be put into a systematic framework by means of a Bayesian approach. The application of Bayesian approaches to real problems can be, however, quite challenging. In most cases the solution is explored via Monte Carlo sampling or variational methods. Here we work further on the application of variational methods to clustering problems. We introduce generative models based on a hidden group structure and prior distributions. We extend previous attends by Jaynes, and derive the prior distributions based on symmetry arguments. As a case study we address the problems of two-sides clustering real value data and clustering data represented by a hypergraph or bipartite graph. From the variational calculations, and depending on the starting statistical model for the data, we derive a variational Bayes algorithm, a generalized version of the expectation maximization algorithm with a built in penalization for model complexity or bias. We demonstrate the good performance of the variational Bayes algorithm using test examples. }, author = {Vazquez, Alexei}, interhash = {ee1f9455db7046612d0baf0360e0f428}, intrahash = {887ae82953a03602e0a135d303950b80}, note = {cite arxiv:0805.2689 Comment: 12 pages, 5 figures. New sections added}, title = {Bayesian approach to clustering real value, categorical and network data: solution via variational methods}, url = {http://arxiv.org/abs/0805.2689}, year = 2008 } @inproceedings{conf/nips/ZhouHS06, author = {Zhou, Dengyong and Huang, Jiayuan and Schölkopf, Bernhard}, booktitle = {NIPS}, crossref = {conf/nips/2006}, date = {2007-10-25}, editor = {Schölkopf, Bernhard and Platt, John and Hoffman, Thomas}, ee = {http://books.nips.cc/papers/files/nips19/NIPS2006_0630.pdf}, interhash = {417da4dd6977c69b207334f358fdee49}, intrahash = {0c070304670432054ed74e1c34406b18}, isbn = {0-262-19568-2}, pages = {1601-1608}, publisher = {MIT Press}, title = {Learning with Hypergraphs: Clustering, Classification, and Embedding.}, url = {http://dblp.uni-trier.de/db/conf/nips/nips2006.html#ZhouHS06}, year = 2006 }