@inproceedings{conf/cvpr/ShiM97, author = {Shi, Jianbo and Malik, Jitendra}, booktitle = {CVPR}, ee = {http://computer.org/proceedings/cvpr/7822/78220731abs.htm}, interhash = {600345c3af56da066873a30c9971a615}, intrahash = {bc4607ac2084911e4b1ba23b323f649a}, pages = {731-737}, title = {Normalized Cuts and Image Segmentation.}, url = {http://dblp.uni-trier.de/db/conf/cvpr/cvpr1997.html#ShiM97}, year = 1997 } @inproceedings{grahl07conceptualKdml, author = {Grahl, Miranda and Hotho, Andreas and Stumme, Gerd}, booktitle = {Workshop Proceedings of Lernen - Wissensentdeckung - Adaptivität (LWA 2007)}, editor = {Hinneburg, Alexander}, interhash = {9c3bb05456bf11bcd88a1135de51f7d9}, intrahash = {6d5188d66564fe4ed7386e28868504de}, isbn = {978-3-86010-907-6}, month = sep, pages = {50-54}, publisher = {Martin-Luther-Universität Halle-Wittenberg}, title = {Conceptual Clustering of Social Bookmark Sites}, url = {http://www.tagora-project.eu/wp-content/2007/06/grahl_iknow07.pdf }, vgwort = {14}, year = 2007 } @inproceedings{Begelman2006, address = {Edinburgh}, author = {Begelman, Grigory and Keller, Philipp and Smadja, Frank}, booktitle = {Proceedings of the WWW 2006 Workshop on Collaborative Web Tagging Workshop}, interhash = {ffacd9d40f6cba1aa8140f501c2a1802}, intrahash = {95449b3d4b12e8930d529e1e22d51e04}, month = may, pdf = {http://www.rawsugar.com/www2006/20.pdf}, timestamp = {2007.04.11}, title = {Automated Tag Clustering: Improving search and exploration in the tag space}, url = {http://www.rawsugar.com/www2006/taggingworkshopschedule.html}, year = 2006 } @techreport{Spielman:1996, address = {Berkeley, CA, USA}, author = {Spielman, Daniel A. and Teng, Shang}, interhash = {83f3d15605beda920551830ccac3d79a}, intrahash = {06b1b19e0a29a145555cb1526716c451}, publisher = {University of California at Berkeley}, title = {Spectral Partitioning Works: Planar Graphs and Finite Element Meshes}, year = 1996 } @article{donath1973lbp, author = {Donath, W.E. and Hoffman, A.J.}, interhash = {ff38bdeb46caa114a3efad739319973f}, intrahash = {7cb789bd22dfa8ccdd2abdd30121dfc9}, journal = {IBM Journal of Research and Development}, number = 5, pages = {420--425}, title = {{Lower bounds for the partitioning of graphs}}, volume = 17, year = 1973 } @inproceedings{Ng01onspectral, abstract = {Despite many empirical successes of spectral clustering methods| algorithms that cluster points using eigenvectors of matrices derived from the data|there are several unresolved issues. First, there are a wide variety of algorithms that use the eigenvectors in slightly dierent ways. Second, many of these algorithms have no proof that they will actually compute a reasonable clustering. In this paper, we present a simple spectral clustering algorithm that can be implemented using a few lines of Matlab. Using tools from matrix perturbation theory, we analyze the algorithm, and give conditions under which it can be expected to do well. We also show surprisingly good experimental results on a number of challenging clustering problems. 1}, author = {Ng, Andrew Y. and Jordan, Michael I. and Weiss, Yair}, booktitle = {Advances in Neural Information Processing Systems 14}, interhash = {b72c97e659127fc653a0d51143d85b0c}, intrahash = {7485849e42418ee5ceefb45dc6eb603c}, pages = {849--856}, publisher = {MIT Press}, title = {On spectral clustering: Analysis and an algorithm}, year = 2001 } @inproceedings{1066236, abstract = {In this paper we introduce a novel algorithm called TRICLUSTER, for mining coherent clusters in three-dimensional (3D) gene expression datasets. TRICLUSTER can mine arbitrarily positioned and overlapping clusters, and depending on different parameter values, it can mine different types of clusters, including those with constant or similar values along each dimension, as well as scaling and shifting expression patterns. TRICLUSTER relies on graph-based approach to mine all valid clusters. For each time slice, i.e., a gene×sample matrix, it constructs the range multigraph, a compact representation of all similar value ranges between any two sample columns. It then searches for constrained maximal cliques in this multigraph to yield the set of bi-clusters for this time slice. Then TRICLUSTER constructs another graph using the biclusters (as vertices) from each time slice; mining cliques from this graph yields the final set of triclusters. Optionally, TRICLUSTER merges/deletes some clusters having large overlaps. We present a useful set of metrics to evaluate the clustering quality, and we show that TRICLUSTER can find significant triclusters in the real microarray datasets.}, address = {New York, NY, USA}, author = {Zhao, Lizhuang and Zaki, Mohammed J.}, booktitle = {SIGMOD '05: Proceedings of the 2005 ACM SIGMOD international conference on Management of data}, doi = {http://doi.acm.org/10.1145/1066157.1066236}, interhash = {a4e66b4d48599fe17da1a0be9da4859d}, intrahash = {f99143dfa553745fb2e7d7f96a8b4bb7}, isbn = {1-59593-060-4}, location = {Baltimore, Maryland}, pages = {694--705}, publisher = {ACM}, title = {TRICLUSTER: an effective algorithm for mining coherent clusters in 3D microarray data}, url = {http://portal.acm.org/citation.cfm?id=1066157.1066236}, year = 2005 } @article{Boley97principaldirection, abstract = {We propose a new algorithm capable of partitioning a set of documents or other samples based on an embedding in a high dimensional Euclidean space (i.e. in which every document is a vector of real numbers). The method is unusual in that it is divisive, as opposed to agglomerative, and operates by repeatedly splitting clusters into smaller clusters. The splits are not based on any distance or similarity measure. The documents are assembled in to a matrix which is very sparse. It is this sparsity that permits the algorithm to be very efficient. The performance of the method is illustrated with a set of text documents obtained from the World Wide Web. Some possible extensions are proposed for further investigation.}, author = {Boley, Daniel}, interhash = {281afd06bd3e21ec3ef212da4ec18ee0}, intrahash = {bca740460f14035af773f665887b6fa4}, journal = {Data Mining and Knowledge Discovery}, pages = {325--344}, title = {Principal Direction Divisive Partitioning}, volume = 2, year = 1997 } @inproceedings{Approximating2008Java, abstract = {In many social media applications, a small fraction of the members are highly linked while most are sparsely connected to the network. Such a skewed distribution is sometimes referred to as the"long tail". Popular applications like meme trackers and content aggregators mine for information from only the popular blogs located at the head of this curve. On the other hand, the long tail contains large volumes of interesting information and niches. The question we address in this work is how best to approximate the community membership of entities in the long tail using only a small percentage of the entire graph structure. Our technique utilizes basic linear algebra manipulations and spectral methods. It has the advantage of quickly and efficiently finding a reasonable approximation of the community structure of the overall network. Such a method has significant applications in blog analysis engines as well as social media monitoring tools in general. }, author = {Java, Akshay and Joshi, Anupam and FininBook, Tim}, booktitle = {Proceedings of the Second International Conference on Weblogs and Social Media(ICWSM 2008)}, date = {2008 Abstract:}, interhash = {ede357e110fee8803dc181d262f30087}, intrahash = {386f36679c111f30e37ced272d5b355c}, publisher = {AAAI Press}, title = {Approximating the Community Structure of the Long Tail}, url = {http://ebiquity.umbc.edu/paper/html/id/381/Approximating-the-Community-Structure-of-the-Long-Tail}, year = 2008 } @article{papa2007hpa, author = {Papa, D.A. and Markov, I.L.}, interhash = {68b9025134cf6e3a4fb2fe8c3194e2b4}, intrahash = {1c9083525938ab35d2a35df850612cb1}, journal = {Handbook of Approximation Algorithms and Metaheuristics}, publisher = {Chapman \& Hall/CRC}, title = {{Hypergraph partitioning and clustering}}, year = 2007 } @misc{Vazquez2008, abstract = { Data clustering, including problems such as finding network communities, can be put into a systematic framework by means of a Bayesian approach. The application of Bayesian approaches to real problems can be, however, quite challenging. In most cases the solution is explored via Monte Carlo sampling or variational methods. Here we work further on the application of variational methods to clustering problems. We introduce generative models based on a hidden group structure and prior distributions. We extend previous attends by Jaynes, and derive the prior distributions based on symmetry arguments. As a case study we address the problems of two-sides clustering real value data and clustering data represented by a hypergraph or bipartite graph. From the variational calculations, and depending on the starting statistical model for the data, we derive a variational Bayes algorithm, a generalized version of the expectation maximization algorithm with a built in penalization for model complexity or bias. We demonstrate the good performance of the variational Bayes algorithm using test examples. }, author = {Vazquez, Alexei}, interhash = {ee1f9455db7046612d0baf0360e0f428}, intrahash = {887ae82953a03602e0a135d303950b80}, note = {cite arxiv:0805.2689 Comment: 12 pages, 5 figures. New sections added}, title = {Bayesian approach to clustering real value, categorical and network data: solution via variational methods}, url = {http://arxiv.org/abs/0805.2689}, year = 2008 } @inproceedings{conf/nips/ZhouHS06, author = {Zhou, Dengyong and Huang, Jiayuan and Schölkopf, Bernhard}, booktitle = {NIPS}, crossref = {conf/nips/2006}, date = {2007-10-25}, editor = {Schölkopf, Bernhard and Platt, John and Hoffman, Thomas}, ee = {http://books.nips.cc/papers/files/nips19/NIPS2006_0630.pdf}, interhash = {417da4dd6977c69b207334f358fdee49}, intrahash = {0c070304670432054ed74e1c34406b18}, isbn = {0-262-19568-2}, pages = {1601-1608}, publisher = {MIT Press}, title = {Learning with Hypergraphs: Clustering, Classification, and Embedding.}, url = {http://dblp.uni-trier.de/db/conf/nips/nips2006.html#ZhouHS06}, year = 2006 } @article{karypis1997mhp, author = {Karypis, G. and Aggarwal, R. and Kumar, V. and Shekhar, S.}, booktitle = {Proceedings of the 34th annual conference on Design automation}, interhash = {77aa28416ddb1c22834224cf0b09c74e}, intrahash = {bdfb6003e7a8786b0a5649bb250c0a77}, organization = {ACM New York, NY, USA}, pages = {526--529}, title = {{Multilevel hypergraph partitioning: Application in VLSI domain}}, year = 1997 } @article{flake2004graph, author = {Flake, G.W. and Tarjan, R.E. and Tsioutsiouliklis, K.}, interhash = {c6e80a1ce6ca8013e94aa67a27e1fa92}, intrahash = {6cb5211e40c6ff08605f69b133326a0b}, journal = {Internet Mathematics}, number = 4, pages = {385--408}, publisher = {AK Peters}, title = {{Graph clustering and minimum cut trees}}, url = {http://scholar.google.de/scholar.bib?q=info:27hvFfjDdrkJ:scholar.google.com/&output=citation&hl=de&ct=citation&cd=6}, volume = 1, year = 2004 } @article{cantador:bes, author = {Cantador, I. and Castells, P.}, interhash = {876722af69956aa42f7cb1260b456a5a}, intrahash = {eeb2ea8c8f39f6057465b38eea991582}, title = {{Building Emergent Social Networks and Group Profiles by Semantic User Preference Clustering}}, year = 2006 } @article{li2008tag, author = {Li, X. and Guo, L. and Zhao, Y.E.}, interhash = {d7e6a5b8d215682b2a75add69c01de29}, intrahash = {28e7f96eca814c0453ae57c863d1ecfb}, publisher = {ACM New York, NY, USA}, title = {{Tag-based social interest discovery}}, url = {http://scholar.google.de/scholar.bib?q=info:7d-UGUbg_JYJ:scholar.google.com/&output=citation&hl=de&ct=citation&cd=0}, year = 2008 } @article{schaeffer2007graph, author = {Schaeffer, S.E.}, interhash = {24c764dba3c31f76ced3aa58f1983ed4}, intrahash = {d16e5e8575fcb716101fcd2762f2a2a1}, journal = {Computer Science Review}, number = 1, pages = {27--64}, publisher = {Elsevier}, title = {{Graph clustering}}, url = {http://scholar.google.de/scholar.bib?q=info:-vQhplU2EFYJ:scholar.google.com/&output=citation&hl=de&as_sdt=2000&ct=citation&cd=0}, volume = 1, year = 2007 } @article{h1987knowledge, abstract = {Conceptual clustering is an important way of summarizing and explaining data. However, the recent formulation of this paradigm has allowed little exploration of conceptual clustering as a means of improving performance. Furthermore, previous work in conceptual clustering has not explicitly dealt with constraints imposed by real world environments. This article presents COBWEB, a conceptual clustering system that organizes data so as to maximize inference ability. Additionally, COBWEB is incremental and computationally economical, and thus can be flexibly applied in a variety of domains. ER -}, author = {Fisher, Douglas H.}, interhash = {36208ac57cc67951de85bd99b8fb8647}, intrahash = {0edbe48f91025efea4af0a1a62433e42}, journal = {Machine Learning}, month = {#sep#}, number = 2, pages = {139--172}, title = {Knowledge Acquisition Via Incremental Conceptual Clustering}, url = {http://dx.doi.org/10.1023/A:1022852608280}, volume = 2, year = 1987 } @article{Jiang20091479, abstract = {Exploring recent developments in spectral clustering, we discovered that relaxing a spectral reformulation of Newman's Q-measure (a measure that may guide the search for-and help to evaluate the fit of - community structures in networks) yields a new framework for use in detecting fuzzy communities and identifying so-called unstable nodes. In this note, we present and illustrate this approach, which we expect to further enhance our understanding of the intrinsic structure of networks and of network-based clustering procedures. We applied a variation of the fuzzy k-means algorithm, an instance of our framework, to two social networks. The computational results illustrate its potential.}, author = {Jiang, Jeffrey Q. and Dress, Andreas W.M. and Yang, Genke}, doi = {10.1016/j.aml.2009.02.005}, interhash = {08fe9886403ff8d2564fca447aef8172}, intrahash = {d9a603d42a7379d13d8a04404bb951cc}, issn = {0893-9659}, journal = {Applied Mathematics Letters}, number = 9, pages = {1479 - 1482}, title = {A spectral clustering-based framework for detecting community structures in complex networks}, url = {http://www.sciencedirect.com/science/article/B6TY9-4W6XYH5-5/2/693a9ed19784792496c83e96b4fa828b}, volume = 22, year = 2009 } @article{brandes2008modularity, abstract = {Modularity is a recently introduced quality measure for graph clusterings. It has immediately received considerable attention in several disciplines, particularly in the complex systems literature, although its properties are not well understood. We study the problem of finding clusterings with maximum modularity, thus providing theoretical foundations for past and present work based on this measure. More precisely, we prove the conjectured hardness of maximizing modularity both in the general case and with the restriction to cuts and give an Integer Linear Programming formulation. This is complemented by first insights into the behavior and performance of the commonly applied greedy agglomerative approach.}, author = {Brandes, U. and Delling, D. and Gaertler, M. and Gorke, R. and Hoefer, M. and Nikoloski, Z. and Wagner, D.}, doi = {10.1109/TKDE.2007.190689}, interhash = {b7195d25a851617a48d4f15bef5ad789}, intrahash = {9e2e5f9d06d2f83be98083175560c835}, issn = {1041-4347}, journal = {Knowledge and Data Engineering, IEEE Transactions on}, month = {feb. }, number = 2, pages = {172 -188}, title = {On Modularity Clustering}, url = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=4358966&tag=1}, volume = 20, year = 2008 }