@inproceedings{Detecting_Commmunities_via_Simultaneous_Clustering_of_Graphs_and_Folksonomies, author = {Java, Akshay and Joshi, Anupam and Finin, Tim}, booktitle = {WebKDD 2008 Workshop on Web Mining and Web Usage Analysis}, interhash = {acfec953843b168e61e2e167e29b4c3d}, intrahash = {645abd6b3191a2a6e844d7542651ed1c}, month = {August}, note = {To Appear}, title = {Detecting Commmunities via Simultaneous Clustering of Graphs and Folksonomies}, year = 2008 } @inproceedings{conf/kdd/ChiSZHT07, author = {Chi, Yun and Song, Xiaodan and Zhou, Dengyong and Hino, Koji and Tseng, Belle L.}, booktitle = {KDD}, crossref = {conf/kdd/2007}, date = {2007-08-23}, editor = {Berkhin, Pavel and Caruana, Rich and Wu, Xindong}, ee = {http://doi.acm.org/10.1145/1281192.1281212}, interhash = {542ce3968b0d75048000f35669a7fb83}, intrahash = {0829ef077986e88540a96bd8ba154d86}, isbn = {978-1-59593-609-7}, pages = {153-162}, publisher = {ACM}, title = {Evolutionary spectral clustering by incorporating temporal smoothness.}, url = {http://dblp.uni-trier.de/db/conf/kdd/kdd2007.html#ChiSZHT07}, year = 2007 } @article{newman2006fcs, author = {Newman, MEJ}, interhash = {5003bcb34d28e1e4bc301fafb9a12c72}, intrahash = {090a24e34da3d0ab3d14d61dd3ad3285}, journal = {Physical Review E}, number = 3, pages = 36104, publisher = {APS}, title = {{Finding community structure in networks using the eigenvectors of matrices}}, volume = 74, year = 2006 } @inproceedings{conf/icml/WagstaffCRS01, author = {Wagstaff, Kiri and Cardie, Claire and Rogers, Seth and Schrödl, Stefan}, booktitle = {ICML}, crossref = {conf/icml/2001}, date = {2002-11-27}, editor = {Brodley, Carla E. and Danyluk, Andrea Pohoreckyj}, interhash = {10d8a7c9e5b5f9cf0d0848cf8c10f604}, intrahash = {c0c3565625192ee6748b52d9d4f3b526}, isbn = {1-55860-778-1}, pages = {577-584}, publisher = {Morgan Kaufmann}, title = {Constrained K-means Clustering with Background Knowledge.}, url = {http://dblp.uni-trier.de/db/conf/icml/icml2001.html#WagstaffCRS01}, year = 2001 } @inproceedings{conf/cvpr/ShiM97, author = {Shi, Jianbo and Malik, Jitendra}, booktitle = {CVPR}, ee = {http://computer.org/proceedings/cvpr/7822/78220731abs.htm}, interhash = {600345c3af56da066873a30c9971a615}, intrahash = {bc4607ac2084911e4b1ba23b323f649a}, pages = {731-737}, title = {Normalized Cuts and Image Segmentation.}, url = {http://dblp.uni-trier.de/db/conf/cvpr/cvpr1997.html#ShiM97}, year = 1997 } @inproceedings{grahl07conceptualKdml, author = {Grahl, Miranda and Hotho, Andreas and Stumme, Gerd}, booktitle = {Workshop Proceedings of Lernen - Wissensentdeckung - Adaptivität (LWA 2007)}, editor = {Hinneburg, Alexander}, interhash = {9c3bb05456bf11bcd88a1135de51f7d9}, intrahash = {6d5188d66564fe4ed7386e28868504de}, isbn = {978-3-86010-907-6}, month = sep, pages = {50-54}, publisher = {Martin-Luther-Universität Halle-Wittenberg}, title = {Conceptual Clustering of Social Bookmark Sites}, url = {http://www.tagora-project.eu/wp-content/2007/06/grahl_iknow07.pdf }, vgwort = {14}, year = 2007 } @inproceedings{Begelman2006, address = {Edinburgh}, author = {Begelman, Grigory and Keller, Philipp and Smadja, Frank}, booktitle = {Proceedings of the WWW 2006 Workshop on Collaborative Web Tagging Workshop}, interhash = {ffacd9d40f6cba1aa8140f501c2a1802}, intrahash = {95449b3d4b12e8930d529e1e22d51e04}, month = may, pdf = {http://www.rawsugar.com/www2006/20.pdf}, timestamp = {2007.04.11}, title = {Automated Tag Clustering: Improving search and exploration in the tag space}, url = {http://www.rawsugar.com/www2006/taggingworkshopschedule.html}, year = 2006 } @inproceedings{schmitz06-inducing, abstract = {In this paper, we describe some promising initial results in inducing ontology from the Flickr tag vocabulary, using a subsumption-based model. We describe the utility of faceted ontology as a supplement to a tagging system and present our model and results. We propose a revised, probabilistic model using seed ontologies to induce faceted ontology, and describe how the model can integrate into the logistics of tagging communities.}, address = {Edinburgh, Scotland}, author = {Schmitz, Patrick}, booktitle = {Proceedings of the Workshop on Collaborative Tagging at WWW2006}, interhash = {1335f4ef87f951e6edf4fd94f885d3a2}, intrahash = {f913a4ad3a27582ae5d4d269fe38dc5c}, lastdatemodified = {2006-10-12}, lastname = {Schmitz}, month = may, own = {own}, pdf = {schmitz06-inducing.pdf}, read = {readnext}, title = {Inducing Ontology from Flickr Tags}, url = {http://.citeulike.org/user/ryanshaw/article/740688}, year = 2006 } @article{berry1995ula, author = {Berry, M.W. and Dumais, S.T. and O'Brien, G.W.}, interhash = {500361a1ef20600fb86dc5c94c61d276}, intrahash = {f962590e6aa2ab97e21861051e705b9a}, journal = {SIAM REVIEW}, pages = {573--595}, publisher = {SIAM SOCIETY FOR INDUSTRIAL AND APPLIED}, title = {{Using Linear Algebra for Intelligent Information Retrieval}}, volume = 37, year = 1995 } @inproceedings{1348552, abstract = {Recent years have seen that WWW is becoming a flourishing social media which enables individuals to easily share opinions, experiences and expertise at the push of a single button. With the pervasive usage of instant messaging systems and the fundamental shift in the ease of publishing content, social network researchers and graph theory researchers are now concerned with inferring community structures by analyzing the linkage patterns among individuals and web pages. Although the investigation of community structures has motivated many diverse algorithms, most of them are unsuitable for large-scale social networks because of the computational cost. Moreover, in addition to identify the possible community structures, how to define and explain the discovered communities is also significant in many practical scenarios. In this paper, we present the algorithm ComTector(Community DeTector) which is more efficient for the community detection in large-scale social networks based on the nature of overlapping communities in the real world. This algorithm does not require any priori knowledge about the number or the original division of the communities. Because real networks are often large sparse graphs, its running time is thus O(C × Tri2), where C is the number of the detected communities and Tri is the number of the triangles in the given network for the worst case. Then we propose a general naming method by combining the topological information with the entity attributes to define the discovered communities. With respected to practical applications, ComTector is challenged with several real life networks including the Zachary Karate Club, American College Football, Scientific Collaboration, and Telecommunications Call networks. Experimental results show that this algorithm can extract meaningful communities that are agreed with both of the objective facts and our intuitions. }, address = {New York, NY, USA}, author = {Du, Nan and Wu, Bin and Pei, Xin and Wang, Bai and Xu, Liutong}, booktitle = {WebKDD/SNA-KDD '07: Proceedings of the 9th WebKDD and 1st SNA-KDD 2007 workshop on Web mining and social network analysis}, doi = {http://doi.acm.org/10.1145/1348549.1348552}, interhash = {dfb904415a5c8afa2bbcf6cf259ac66d}, intrahash = {67447dbc5492c0c7febbb62a7286940a}, isbn = {978-1-59593-848-0}, location = {San Jose, California}, pages = {16--25}, publisher = {ACM}, title = {Community detection in large-scale social networks}, url = {http://portal.acm.org/citation.cfm?id=1348549.1348552&coll=GUIDE&dl=GUIDE&type=series&idx=SERIES939&part=series&WantType=Proceedings&title=KDD&CFID=17695029&CFTOKEN=22803053}, year = 2007 } @inproceedings{347121, address = {New York, NY, USA}, author = {Flake, Gary William and Lawrence, Steve and Giles, C. Lee}, booktitle = {KDD '00: Proceedings of the sixth ACM SIGKDD international conference on Knowledge discovery and data mining}, doi = {http://doi.acm.org/10.1145/347090.347121}, interhash = {e74be2040258b24f3b2e03466931a9da}, intrahash = {b37bffe4a02dace7c303d663fd24182c}, isbn = {1-58113-233-6}, location = {Boston, Massachusetts, United States}, pages = {150--160}, publisher = {ACM Press}, title = {Efficient identification of Web communities}, year = 2000 } @article{flake2002soa, author = {Flake, GW and Lawrence, S. and Giles, CL and Coetzee, FM}, interhash = {e82166b28b84e17c05b08a864ee0c7e4}, intrahash = {8355745989d38aed8675ca75cc0b3553}, journal = {Computer}, number = 3, pages = {66--70}, title = {{Self-organization and identification of Web communities}}, volume = 35, year = 2002 } @article{1214919, author = {Huang, Xiaodi and Lai, Wei}, interhash = {7e7bac6733229f984063ea2f7eaaf400}, intrahash = {4da3949f425268426099fff3a45e6025}, issn = {1098-8068}, journal = {Database Engineering and Applications Symposium, 2003. Proceedings. Seventh International}, month = {July}, pages = { 123-128}, title = {Identification of clusters in the Web graph based on link topology}, year = 2003 } @inproceedings{mitzlaff2010visit, abstract = {The ongoing spread of online social networking and sharing sites has reshaped the way how people interact with each other. Analyzing the relatedness of different users within the resulting large populations of these systems plays an important role for tasks like user recommendation or community detection. Algorithms in these fields typically face the problem that explicit user relationships (like friend lists) are often very sparse. Surprisingly, implicit evidences (like click logs) of user relations have hardly been considered to this end. Based on our long-time experience with running BibSonomy [4], we identify in this paper different evidence networks of user relationships in our system. We broadly classify each network based on whether the links are explicitly established by the users (e.g., friendship or group membership) or accrue implicitly in the running system (e.g., when user u copies an entry of user v). We systematically analyze structural properties of these networks and whether topological closeness (in terms of the length of shortest paths) coincides with semantic similarity between users.}, address = {New York, NY, USA}, author = {Mitzlaff, Folke and Benz, Dominik and Stumme, Gerd and Hotho, Andreas}, booktitle = {HT '10: Proceedings of the 21st ACM Conference on Hypertext and Hypermedia}, doi = {10.1145/1810617.1810664}, interhash = {5584c4c57fcd8eb4663df8b114bcf09c}, intrahash = {6628bf43e3834ba147a22992f2f534e9}, isbn = {978-1-4503-0041-4}, location = {Toronto, Ontario, Canada}, pages = {265--270}, publisher = {ACM}, title = {Visit me, click me, be my friend: an analysis of evidence networks of user relationships in BibSonomy}, url = {http://portal.acm.org/citation.cfm?id=1810617.1810664}, year = 2010 } @book{noauthororeditoryahoo, abstract = {The past decade has witnessed the emergence of participatory Web and social media, bringing people together in many creative ways. Millions of users are playing, tagging, working, and socializing online, demonstrating new forms of collaboration, communication, and intelligence that were hardly imaginable just a short time ago. Social media also helps reshape business models, sway opinions and emotions, and opens up numerous possibilities to study human interaction and collective behavior in an unparalleled scale. This lecture, from a data mining perspective, introduces characteristics of social media, reviews representative tasks of computing with social media, and illustrates associated challenges. It introduces basic concepts, presents state-of-the-art algorithms with easy-to-understand examples, and recommends effective evaluation methods. In particular, we discuss graph-based community detection techniques and many important extensions that handle dynamic, heterogeneous networks in social media. We also demonstrate how discovered patterns of communities can be used for social media mining. The concepts, algorithms, and methods presented in this lecture can help harness the power of social media and support building socially-intelligent systems. This book is an accessible introduction to the study of \emph{community detection and mining in social media}. It is an essential reading for students, researchers, and practitioners in disciplines and applications where social media is a key source of data that piques our curiosity to understand, manage, innovate, and excel. This book is supported by additional materials, including lecture slides, the complete set of figures, key references, some toy data sets used in the book, and the source code of representative algorithms. The readers are encouraged to visit the book website for the latest information. Table of Contents: Social Media and Social Computing / Nodes, Ties, and Influence / Community Detection and Evaluation / Communities in Heterogeneous Networks / Social Media Mining }, author = {Tang‌, Lei and Liu‌, Huan}, doi = {10.2200/S00298ED1V01Y201009DMK003}, interhash = {717f8b976eec1dc934a3b84675456f25}, intrahash = {c4e1fa6bf2d52a237e5557640d87c970}, title = {Community Detection and Mining in Social Media}, url = {http://www.morganclaypool.com/doi/abs/10.2200/S00298ED1V01Y201009DMK003}, year = 2010 } @article{Rosvall29012008, abstract = {To comprehend the multipartite organization of large-scale biological and social systems, we introduce an information theoretic approach that reveals community structure in weighted and directed networks. We use the probability flow of random walks on a network as a proxy for information flows in the real system and decompose the network into modules by compressing a description of the probability flow. The result is a map that both simplifies and highlights the regularities in the structure and their relationships. We illustrate the method by making a map of scientific communication as captured in the citation patterns of >6,000 journals. We discover a multicentric organization with fields that vary dramatically in size and degree of integration into the network of science. Along the backbone of the network—including physics, chemistry, molecular biology, and medicine—information flows bidirectionally, but the map reveals a directional pattern of citation from the applied fields to the basic sciences.}, author = {Rosvall, Martin and Bergstrom, Carl T.}, doi = {10.1073/pnas.0706851105}, eprint = {http://www.pnas.org/content/105/4/1118.full.pdf+html}, interhash = {8192f8db9fce0417034311e81a477838}, intrahash = {ffe2c7ca3a20430f60dfd138e72df5f5}, journal = {Proceedings of the National Academy of Sciences}, number = 4, pages = {1118-1123}, title = {Maps of random walks on complex networks reveal community structure}, url = {http://www.pnas.org/content/105/4/1118.abstract}, volume = 105, year = 2008 } @inproceedings{jaeschke2006wege, abstract = {Ein wichtiger Baustein des neu entdeckten World Wide Web -- des "`Web 2.0"' -- stellen Folksonomies dar. In diesen Systemen können Benutzer gemeinsam Ressourcen verwalten und mit Schlagwörtern versehen. Die dadurch entstehenden begrifflichen Strukturen stellen ein interessantes Forschungsfeld dar. Dieser Artikel untersucht Ansätze und Wege zur Entdeckung und Strukturierung von Nutzergruppen ("Communities") in Folksonomies.}, address = {Halle-Wittenberg}, author = {Jäschke, Robert and Hotho, Andreas and Schmitz, Christoph and Stumme, Gerd}, booktitle = {Proc. 18. Workshop Grundlagen von Datenbanken}, editor = {Braß, Stefan and Hinneburg, Alexander}, interhash = {59224b5889a24108434a9b5ecc6b0887}, intrahash = {2b6be3bd5daee7119973fcf69909956f}, month = {June}, pages = {80-84}, publisher = {Martin-Luther-Universität }, title = {Wege zur Entdeckung von Communities in Folksonomies}, url = {http://www.kde.cs.uni-kassel.de/stumme/papers/2006/jaeschke2006wege.pdf}, year = 2006 } @article{Atzmueller:12c, author = {Atzmueller, Martin}, interhash = {0b20c1d53d5df05326d594726273c2fb}, intrahash = {7b616e64994893a2aad95b5ad95db662}, journal = {WIREs: Data Mining and Knowledge Discovery}, title = {{Mining Social Media: Key Players, Sentiments, and Communities}}, volume = {In Press}, year = 2012 } @article{fortunato2010community, abstract = {The modern science of networks has brought significant advances to our understanding of complex systems. One of the most relevant features of graphs representing real systems is community structure, or clustering, i.e. the organization of vertices in clusters, with many edges joining vertices of the same cluster and comparatively few edges joining vertices of different clusters. Such clusters, or communities, can be considered as fairly independent compartments of a graph, playing a similar role like, e.g., the tissues or the organs in the human body. Detecting communities is of great importance in sociology, biology and computer science, disciplines where systems are often represented as graphs. This problem is very hard and not yet satisfactorily solved, despite the huge effort of a large interdisciplinary community of scientists working on it over the past few years. We will attempt a thorough exposition of the topic, from the definition of the main elements of the problem, to the presentation of most methods developed, with a special focus on techniques designed by statistical physicists, from the discussion of crucial issues like the significance of clustering and how methods should be tested and compared against each other, to the description of applications to real networks. }, author = {Fortunato, Santo}, doi = {http://dx.doi.org/10.1016/j.physrep.2009.11.002}, interhash = {9f6089e942903fc65309f77744c88109}, intrahash = {fddddfb8990e8ea824c8c4b62244f737}, issn = {0370-1573}, journal = {Physics Reports }, number = {3–5}, pages = {75 - 174}, title = {Community detection in graphs }, url = {http://www.sciencedirect.com/science/article/pii/S0370157309002841}, volume = 486, year = 2010 } @article{newman2004finding, author = {Newman, M. E. J. and Girvan, M.}, doi = {10.1103/PhysRevE.69.026113}, interhash = {b9145040e35ccb4d2a0ce18105e64ff4}, intrahash = {1dbc30a1818aa74973f387162e485443}, journal = {Phys. Rev. E}, month = feb, number = 2, numpages = {15}, pages = 026113, publisher = {American Physical Society}, title = {Finding and evaluating community structure in networks}, url = {http://link.aps.org/doi/10.1103/PhysRevE.69.026113}, volume = 69, year = 2004 }