@inproceedings{Detecting_Commmunities_via_Simultaneous_Clustering_of_Graphs_and_Folksonomies, author = {Java, Akshay and Joshi, Anupam and Finin, Tim}, booktitle = {WebKDD 2008 Workshop on Web Mining and Web Usage Analysis}, interhash = {acfec953843b168e61e2e167e29b4c3d}, intrahash = {645abd6b3191a2a6e844d7542651ed1c}, month = {August}, note = {To Appear}, title = {Detecting Commmunities via Simultaneous Clustering of Graphs and Folksonomies}, year = 2008 } @inproceedings{conf/kdd/ChiSZHT07, author = {Chi, Yun and Song, Xiaodan and Zhou, Dengyong and Hino, Koji and Tseng, Belle L.}, booktitle = {KDD}, crossref = {conf/kdd/2007}, date = {2007-08-23}, editor = {Berkhin, Pavel and Caruana, Rich and Wu, Xindong}, ee = {http://doi.acm.org/10.1145/1281192.1281212}, interhash = {542ce3968b0d75048000f35669a7fb83}, intrahash = {0829ef077986e88540a96bd8ba154d86}, isbn = {978-1-59593-609-7}, pages = {153-162}, publisher = {ACM}, title = {Evolutionary spectral clustering by incorporating temporal smoothness.}, url = {http://dblp.uni-trier.de/db/conf/kdd/kdd2007.html#ChiSZHT07}, year = 2007 } @inproceedings{conf/icml/WagstaffCRS01, author = {Wagstaff, Kiri and Cardie, Claire and Rogers, Seth and Schrödl, Stefan}, booktitle = {ICML}, crossref = {conf/icml/2001}, date = {2002-11-27}, editor = {Brodley, Carla E. and Danyluk, Andrea Pohoreckyj}, interhash = {10d8a7c9e5b5f9cf0d0848cf8c10f604}, intrahash = {c0c3565625192ee6748b52d9d4f3b526}, isbn = {1-55860-778-1}, pages = {577-584}, publisher = {Morgan Kaufmann}, title = {Constrained K-means Clustering with Background Knowledge.}, url = {http://dblp.uni-trier.de/db/conf/icml/icml2001.html#WagstaffCRS01}, year = 2001 } @inproceedings{conf/cvpr/ShiM97, author = {Shi, Jianbo and Malik, Jitendra}, booktitle = {CVPR}, ee = {http://computer.org/proceedings/cvpr/7822/78220731abs.htm}, interhash = {600345c3af56da066873a30c9971a615}, intrahash = {bc4607ac2084911e4b1ba23b323f649a}, pages = {731-737}, title = {Normalized Cuts and Image Segmentation.}, url = {http://dblp.uni-trier.de/db/conf/cvpr/cvpr1997.html#ShiM97}, year = 1997 } @inproceedings{grahl07conceptualKdml, author = {Grahl, Miranda and Hotho, Andreas and Stumme, Gerd}, booktitle = {Workshop Proceedings of Lernen - Wissensentdeckung - Adaptivität (LWA 2007)}, editor = {Hinneburg, Alexander}, interhash = {9c3bb05456bf11bcd88a1135de51f7d9}, intrahash = {6d5188d66564fe4ed7386e28868504de}, isbn = {978-3-86010-907-6}, month = sep, pages = {50-54}, publisher = {Martin-Luther-Universität Halle-Wittenberg}, title = {Conceptual Clustering of Social Bookmark Sites}, url = {http://www.tagora-project.eu/wp-content/2007/06/grahl_iknow07.pdf }, vgwort = {14}, year = 2007 } @inproceedings{Begelman2006, address = {Edinburgh}, author = {Begelman, Grigory and Keller, Philipp and Smadja, Frank}, booktitle = {Proceedings of the WWW 2006 Workshop on Collaborative Web Tagging Workshop}, interhash = {ffacd9d40f6cba1aa8140f501c2a1802}, intrahash = {95449b3d4b12e8930d529e1e22d51e04}, month = may, pdf = {http://www.rawsugar.com/www2006/20.pdf}, timestamp = {2007.04.11}, title = {Automated Tag Clustering: Improving search and exploration in the tag space}, url = {http://www.rawsugar.com/www2006/taggingworkshopschedule.html}, year = 2006 } @inproceedings{mcqueen1967smc, author = {MacQueen, J.}, booktitle = {Proceedings of the 5th Berkeley Symposium on Mathematical Statistics and Probability - Vol. 1}, editor = {{Le Cam}, L. M. and Neyman, J.}, interhash = {8d7d4dfe7d3a06b8c9c3c2bb7aa91e28}, intrahash = {d23dfdff44ca5121fde221604128ab80}, pages = {281--297}, publisher = {University of California Press, Berkeley, CA, USA}, title = {Some Methods for Classification and Analysis of Multivariate Observations}, url = {http://projecteuclid.org/euclid.bsmsp/1200512992}, year = 1967 } @article{cattuto2007, author = {Cattuto, C. and Schmitz, C. and Baldassarri, A. and Servedio, V. D. P. and Loreto, V. and Hotho, A. and Grahl, M. and Stumme, G.}, interhash = {fc5f2df61d28bc99b7e15029da125588}, intrahash = {d87e198a6d564ae8a8fe151e0a96fa0f}, journal = {AI Communications}, number = 4, pages = {245 - 262}, title = {Network Properties of Folksonomies}, url = {http://www.kde.cs.uni-kassel.de/hotho/pub/2007/aicomm_2007_folksonomy_clustering.pdf}, vgwort = {67}, volume = 20, year = 2007 } @article{partitioning89, author = {Pothen, A. and Simon, H.D. and Liou, K.P.}, interhash = {2f6596b5093c7d16e3817dadf39b8aa2}, intrahash = {7444914fc73fc8ec12a67e5e172c34c0}, journal = {SIAM J. MATRIX ANAL. APPLIC.}, number = 3, pages = {430--452}, title = {{Partitioning Sparse Matrices with Eigenvectors of Graphs}}, url = {http://ntrs.nasa.gov/archive/nasa/casi.ntrs.nasa.gov/19970011963_1997016998.pdf }, volume = 11, year = 1990 } @unpublished{ranade:sus, author = {Ranade, A.G.}, interhash = {473e92f688426631dd9ccc7639a5e861}, intrahash = {9f7f1562631792a85f2dd5f5eefbcf4d}, title = {{Some uses of spectral methods}}, year = 2000 } @techreport{Spielman:1996, address = {Berkeley, CA, USA}, author = {Spielman, Daniel A. and Teng, Shang}, interhash = {83f3d15605beda920551830ccac3d79a}, intrahash = {06b1b19e0a29a145555cb1526716c451}, publisher = {University of California at Berkeley}, title = {Spectral Partitioning Works: Planar Graphs and Finite Element Meshes}, year = 1996 } @article{donath1973lbp, author = {Donath, W.E. and Hoffman, A.J.}, interhash = {ff38bdeb46caa114a3efad739319973f}, intrahash = {7cb789bd22dfa8ccdd2abdd30121dfc9}, journal = {IBM Journal of Research and Development}, number = 5, pages = {420--425}, title = {{Lower bounds for the partitioning of graphs}}, volume = 17, year = 1973 } @inproceedings{Ng01onspectral, abstract = {Despite many empirical successes of spectral clustering methods| algorithms that cluster points using eigenvectors of matrices derived from the data|there are several unresolved issues. First, there are a wide variety of algorithms that use the eigenvectors in slightly dierent ways. Second, many of these algorithms have no proof that they will actually compute a reasonable clustering. In this paper, we present a simple spectral clustering algorithm that can be implemented using a few lines of Matlab. Using tools from matrix perturbation theory, we analyze the algorithm, and give conditions under which it can be expected to do well. We also show surprisingly good experimental results on a number of challenging clustering problems. 1}, author = {Ng, Andrew Y. and Jordan, Michael I. and Weiss, Yair}, booktitle = {Advances in Neural Information Processing Systems 14}, interhash = {b72c97e659127fc653a0d51143d85b0c}, intrahash = {7485849e42418ee5ceefb45dc6eb603c}, pages = {849--856}, publisher = {MIT Press}, title = {On spectral clustering: Analysis and an algorithm}, year = 2001 } @article{keyhere, abstract = {Established in 2005, YouTube is one of the fastest-growing websites, and has become one of the most accessed sites in the Internet. It has a significant impact on the Internet traffic distribution, but itself is suffering from severe scalabilityconstraints. Understanding the features of YouTube and similar video sharing sites is thus crucial to network traffic engineeringand to sustainable development of this new generation of services.}, author = {Cheng, Xu and Dale, Cameron and Liu, Jiangchuan}, interhash = {472b1063972e934e1f9ab675b5896a77}, intrahash = {46d32b77eec4c8ffa4c8f469131a95b1}, journal = {Peer-to-Peer Video}, pages = {205--217}, title = {Characteristics and Potentials of YouTube: A Measurement Study}, url = {http://dx.doi.org/10.1007/978-0-387-76450-4_9}, year = 2008 } @article{gross:ira, author = {Gross, R. and Acquisti, A.}, interhash = {19bb9c028aa5074819b2e2ab1aa90824}, intrahash = {967d9adc2e573348524ac6dbe09b42fe}, title = {{Information revelation and privacy in online social networks}}, year = 2008 } @inproceedings{1435498, address = {New York, NY, USA}, author = {Maia, Marcelo and Almeida, Jussara and Almeida, Virg\'{\i}lio}, booktitle = {SocialNets '08: Proceedings of the 1st workshop on Social network systems}, doi = {http://doi.acm.org/10.1145/1435497.1435498}, interhash = {c8510afc8df91d8a87bfff0fdacc5cab}, intrahash = {21aa1a6e2e63cb7c23a29130e69b74af}, isbn = {978-1-60558-124-8}, location = {Glasgow, Scotland}, pages = {1--6}, publisher = {ACM}, title = {Identifying user behavior in online social networks}, url = {http://portal.acm.org/citation.cfm?id=1435497.1435498}, year = 2008 } @inproceedings{Ester1996, author = {Ester, Martin and Kriegel, Hans-Peter and Sander, J{\"o}rg and Xu, Xiaowei}, booktitle = {Proc. of 2nd International Conference on Knowledge Discovery and Data Mining (KDD-96)}, file = {:KDD96-037.pdf:PDF}, interhash = {ba33e4d6b4e5b26bd9f543f26b7d250a}, intrahash = {2f9e50f0a003c4d3067cab2b6fa47fe0}, pages = {226-231}, title = {A Density-Based Algorithm for Discovering Clusters in Large Spatial Databases with Noise}, year = 1996 } @inproceedings{1066236, abstract = {In this paper we introduce a novel algorithm called TRICLUSTER, for mining coherent clusters in three-dimensional (3D) gene expression datasets. TRICLUSTER can mine arbitrarily positioned and overlapping clusters, and depending on different parameter values, it can mine different types of clusters, including those with constant or similar values along each dimension, as well as scaling and shifting expression patterns. TRICLUSTER relies on graph-based approach to mine all valid clusters. For each time slice, i.e., a gene×sample matrix, it constructs the range multigraph, a compact representation of all similar value ranges between any two sample columns. It then searches for constrained maximal cliques in this multigraph to yield the set of bi-clusters for this time slice. Then TRICLUSTER constructs another graph using the biclusters (as vertices) from each time slice; mining cliques from this graph yields the final set of triclusters. Optionally, TRICLUSTER merges/deletes some clusters having large overlaps. We present a useful set of metrics to evaluate the clustering quality, and we show that TRICLUSTER can find significant triclusters in the real microarray datasets.}, address = {New York, NY, USA}, author = {Zhao, Lizhuang and Zaki, Mohammed J.}, booktitle = {SIGMOD '05: Proceedings of the 2005 ACM SIGMOD international conference on Management of data}, doi = {http://doi.acm.org/10.1145/1066157.1066236}, interhash = {a4e66b4d48599fe17da1a0be9da4859d}, intrahash = {f99143dfa553745fb2e7d7f96a8b4bb7}, isbn = {1-59593-060-4}, location = {Baltimore, Maryland}, pages = {694--705}, publisher = {ACM}, title = {TRICLUSTER: an effective algorithm for mining coherent clusters in 3D microarray data}, url = {http://portal.acm.org/citation.cfm?id=1066157.1066236}, year = 2005 } @article{Boley97principaldirection, abstract = {We propose a new algorithm capable of partitioning a set of documents or other samples based on an embedding in a high dimensional Euclidean space (i.e. in which every document is a vector of real numbers). The method is unusual in that it is divisive, as opposed to agglomerative, and operates by repeatedly splitting clusters into smaller clusters. The splits are not based on any distance or similarity measure. The documents are assembled in to a matrix which is very sparse. It is this sparsity that permits the algorithm to be very efficient. The performance of the method is illustrated with a set of text documents obtained from the World Wide Web. Some possible extensions are proposed for further investigation.}, author = {Boley, Daniel}, interhash = {281afd06bd3e21ec3ef212da4ec18ee0}, intrahash = {bca740460f14035af773f665887b6fa4}, journal = {Data Mining and Knowledge Discovery}, pages = {325--344}, title = {Principal Direction Divisive Partitioning}, volume = 2, year = 1997 } @inproceedings{Approximating2008Java, abstract = {In many social media applications, a small fraction of the members are highly linked while most are sparsely connected to the network. Such a skewed distribution is sometimes referred to as the"long tail". Popular applications like meme trackers and content aggregators mine for information from only the popular blogs located at the head of this curve. On the other hand, the long tail contains large volumes of interesting information and niches. The question we address in this work is how best to approximate the community membership of entities in the long tail using only a small percentage of the entire graph structure. Our technique utilizes basic linear algebra manipulations and spectral methods. It has the advantage of quickly and efficiently finding a reasonable approximation of the community structure of the overall network. Such a method has significant applications in blog analysis engines as well as social media monitoring tools in general. }, author = {Java, Akshay and Joshi, Anupam and FininBook, Tim}, booktitle = {Proceedings of the Second International Conference on Weblogs and Social Media(ICWSM 2008)}, date = {2008 Abstract:}, interhash = {ede357e110fee8803dc181d262f30087}, intrahash = {386f36679c111f30e37ced272d5b355c}, publisher = {AAAI Press}, title = {Approximating the Community Structure of the Long Tail}, url = {http://ebiquity.umbc.edu/paper/html/id/381/Approximating-the-Community-Structure-of-the-Long-Tail}, year = 2008 }