@article{Jiang20091479, abstract = {Exploring recent developments in spectral clustering, we discovered that relaxing a spectral reformulation of Newman's Q-measure (a measure that may guide the search for-and help to evaluate the fit of - community structures in networks) yields a new framework for use in detecting fuzzy communities and identifying so-called unstable nodes. In this note, we present and illustrate this approach, which we expect to further enhance our understanding of the intrinsic structure of networks and of network-based clustering procedures. We applied a variation of the fuzzy k-means algorithm, an instance of our framework, to two social networks. The computational results illustrate its potential.}, author = {Jiang, Jeffrey Q. and Dress, Andreas W.M. and Yang, Genke}, doi = {10.1016/j.aml.2009.02.005}, interhash = {08fe9886403ff8d2564fca447aef8172}, intrahash = {d9a603d42a7379d13d8a04404bb951cc}, issn = {0893-9659}, journal = {Applied Mathematics Letters}, number = 9, pages = {1479 - 1482}, title = {A spectral clustering-based framework for detecting community structures in complex networks}, url = {http://www.sciencedirect.com/science/article/B6TY9-4W6XYH5-5/2/693a9ed19784792496c83e96b4fa828b}, volume = 22, year = 2009 } @article{brandes2008modularity, abstract = {Modularity is a recently introduced quality measure for graph clusterings. It has immediately received considerable attention in several disciplines, particularly in the complex systems literature, although its properties are not well understood. We study the problem of finding clusterings with maximum modularity, thus providing theoretical foundations for past and present work based on this measure. More precisely, we prove the conjectured hardness of maximizing modularity both in the general case and with the restriction to cuts and give an Integer Linear Programming formulation. This is complemented by first insights into the behavior and performance of the commonly applied greedy agglomerative approach.}, author = {Brandes, U. and Delling, D. and Gaertler, M. and Gorke, R. and Hoefer, M. and Nikoloski, Z. and Wagner, D.}, doi = {10.1109/TKDE.2007.190689}, interhash = {b7195d25a851617a48d4f15bef5ad789}, intrahash = {9e2e5f9d06d2f83be98083175560c835}, issn = {1041-4347}, journal = {Knowledge and Data Engineering, IEEE Transactions on}, month = {feb. }, number = 2, pages = {172 -188}, title = {On Modularity Clustering}, url = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=4358966&tag=1}, volume = 20, year = 2008 } @article{keyhere, abstract = {The theory of concept (or Galois) lattices provides a simple and formal approach to conceptual clustering. In this paper we present GALOIS, a system that automates and applies this theory. The algorithm utilized by GALOIS to build a concept lattice is incremental and efficient, each update being done in time at most quadratic in the number of objects in the lattice. Also, the algorithm may incorporate background information into the lattice, and through clustering, extend the scope of the theory. The application we present is concerned with information retrieval via browsing, for which we argue that concept lattices may represent major support structures. We describe a prototype user interface for browsing through the concept lattice of a document-term relation, possibly enriched with a thesaurus of terms. An experimental evaluation of the system performed on a medium-sized bibliographic database shows good retrieval performance and a significant improvement after the introduction of background knowledge. ER -}, author = {Carpineto, Claudio and Romano, Giovanni}, interhash = {719ac1badf95acafafbd1487d82ae175}, intrahash = {a53905954aeef0a80ec7424f978bca14}, journal = {Machine Learning}, month = {#aug#}, number = 2, pages = {95--122}, title = {A lattice conceptual clustering system and its application to browsing retrieval}, url = {http://dx.doi.org/10.1007/BF00058654}, volume = 24, year = 1996 } @misc{Leskovec2010, abstract = { Detecting clusters or communities in large real-world graphs such as large social or information networks is a problem of considerable interest. In practice, one typically chooses an objective function that captures the intuition of a network cluster as set of nodes with better internal connectivity than external connectivity, and then one applies approximation algorithms or heuristics to extract sets of nodes that are related to the objective function and that "look like" good communities for the application of interest. In this paper, we explore a range of network community detection methods in order to compare them and to understand their relative performance and the systematic biases in the clusters they identify. We evaluate several common objective functions that are used to formalize the notion of a network community, and we examine several different classes of approximation algorithms that aim to optimize such objective functions. In addition, rather than simply fixing an objective and asking for an approximation to the best cluster of any size, we consider a size-resolved version of the optimization problem. Considering community quality as a function of its size provides a much finer lens with which to examine community detection algorithms, since objective functions and approximation algorithms often have non-obvious size-dependent behavior. }, author = {Leskovec, Jure and Lang, Kevin J. and Mahoney, Michael W.}, interhash = {0e58de655596b2198f4a7001facd0c32}, intrahash = {410a9cbea51ea5dd3c56aad26a0e11b2}, note = {cite arxiv:1004.3539 }, title = {Empirical Comparison of Algorithms for Network Community Detection}, url = {http://arxiv.org/abs/1004.3539}, year = 2010 } @inproceedings{hotho03ontologies, address = {Melbourne, Florida}, author = {Hotho, Andreas and Staab, Steffen and Stumme, Gerd}, booktitle = {Proceedings of the 2003 IEEE International Conference on Data Mining}, comment = {alpha}, interhash = {b56c36d6d9c9ca9e6bd236a0f92415a5}, intrahash = {57a39c81cff1982dbefed529be934bee}, month = {November 19-22,}, pages = {541-544 (Poster}, publisher = {IEEE {C}omputer {S}ociety}, title = {Ontologies improve text document clustering}, url = {http://www.kde.cs.uni-kassel.de/stumme/papers/2003/hotho2003ontologies.pdf}, year = 2003 } @techreport{hotho03textclustering, abstract = {Text document clustering plays an important role in providing intuitive navigation and browsing mechanisms by organizing large amounts of information into a small number of meaningful clusters. Standard partitional or agglomerative clustering methods efficiently compute results to this end. However, the bag of words representation used for these clustering methods is often unsatisfactory as it ignores relationships between important terms that do not co-occur literally. Also, it is mostly left to the user to find out why a particular partitioning has been achieved, because it is only specified extensionally. In order to deal with the two problems, we integrate background knowledge into the process of clustering text documents. First, we preprocess the texts, enriching their representations by background knowledge provided in a core ontology — in our application Wordnet. Then, we cluster the documents by a partitional algorithm. Our experimental evaluation on Reuters newsfeeds compares clustering results with pre-categorizations of news. In the experiments, improvements of results by background knowledge compared to the baseline can be shown for many interesting tasks. Second, the clustering partitions the large number of documents to a relatively small number of clusters, which may then be analyzed by conceptual clustering. In our approach, we applied Formal Concept Analysis. Conceptual clustering techniques are known to be too slow for directly clustering several hundreds of documents, but they give an intensional account of cluster results. They allow for a concise description of commonalities and distinctions of different clusters. With background knowledge they even find abstractions like “food” (vs. specializations like “beef” or “corn”). Thus, in our approach, partitional clustering reduces first the size of the problem such that it becomes tractable for conceptual clustering, which then facilitates the understanding of the results.}, author = {Hotho, Andreas and Staab, Steffen and Stumme, Gerd}, comment = {alpha}, institution = {University of Karlsruhe, Institute AIFB}, interhash = {0bc7c3fc1273355f45c8970a7ea58f97}, intrahash = {61d58db419af0dbc3681432588219c3d}, title = {Text Clustering Based on Background Knowledge}, type = {Technical Report }, url = {http://www.kde.cs.uni-kassel.de/stumme/papers/2003/hotho2003text.pdf}, volume = 425, year = 2003 } @inproceedings{hotho02conceptualclustering, author = {Hotho, A. and Stumme, G.}, booktitle = {Proc. Fachgruppentreffen Maschinelles Lernen (FGML 2002)}, comment = {alpha}, editor = {K\'okai, G. and Zeidler, J.}, interhash = {3dd3d4ce38d0de0ba8e167f8133cbb3e}, intrahash = {e253c44552a046fe90236274bcfeab13}, pages = {37-45}, title = {Conceptual Clustering of Text Clusters}, url = {http://www.kde.cs.uni-kassel.de/stumme/papers/2002/FGML02.pdf}, year = 2002 } @inproceedings{hotho03wordnet, address = {Toronto}, author = {Hotho, A and Staab, S. and Stumme, G.}, booktitle = {Proc. SIGIR Semantic Web Workshop}, comment = {alpha}, interhash = {c2a9a89ce20cef90a1e78d34dc2c2afe}, intrahash = {04c7d86337d68e4ed9ae637029c43414}, title = {Wordnet improves text document clustering}, url = {http://www.kde.cs.uni-kassel.de/stumme/papers/2003/hotho2003wordnet.pdf}, year = 2003 } @inproceedings{hotho03explaining, abstract = {Common text clustering techniques offer rather poor capabilities for explaining to their users why a particular result has been achieved. They have the disadvantage that they do not relate semantically nearby terms and that they cannot explain how resulting clusters are related to each other. In this paper, we discuss a way of integrating a large thesaurus and the computation of lattices of resulting clusters into common text clustering in order to overcome these two problems. As its major result, our approach achieves an explanation using an appropriate level of granularity at the concept level as well as an appropriate size and complexity of the explaining lattice of resulting clusters.}, address = {Heidelberg}, author = {Hotho, Andreas and Staab, Steffen and Stumme, Gerd}, booktitle = {Knowledge Discovery in Databases: PKDD 2003, 7th European Conference on Principles and Practice of Knowledge Discovery in Databases}, comment = {alpha}, editor = {Lavra\v{c}, Nada and Gamberger, Dragan and Todorovski, Hendrik BlockeelLjupco}, interhash = {cf66183151a5d94a0941ac6d5089ae89}, intrahash = {53a943b6be4b34cf4e5329d0b58e99f6}, pages = {217-228}, publisher = {Springer}, series = {LNAI}, title = {Explaining Text Clustering Results using Semantic Structures}, url = {http://www.kde.cs.uni-kassel.de/stumme/papers/2003/hotho2003explaining.pdf}, volume = 2838, year = 2003 } @inproceedings{grahl07conceptualKdml, author = {Grahl, Miranda and Hotho, Andreas and Stumme, Gerd}, booktitle = {Workshop Proceedings of Lernen -- Wissensentdeckung -- Adaptivität (LWA 2007)}, editor = {Hinneburg, Alexander}, interhash = {9c3bb05456bf11bcd88a1135de51f7d9}, intrahash = {6d5188d66564fe4ed7386e28868504de}, isbn = {978-3-86010-907-6}, month = sep, pages = {50-54}, publisher = {Martin-Luther-Universität Halle-Wittenberg}, title = {Conceptual Clustering of Social Bookmark Sites}, url = {http://www.kde.cs.uni-kassel.de/hotho/pub/2007/kdml_recommender_final.pdf}, vgwort = {14}, year = 2007 } @inproceedings{stumme01conceptualclustering, address = {Universität Dortmund 763}, author = {Stumme, G. and Taouil, R. and Bastide, Y. and Lakhal, L.}, booktitle = {Proc. GI-Fachgruppentreffen Maschinelles Lernen (FGML'01)}, editor = {Klinkenberg, R. and Rüping, S. and Fick, A. and Henze, N. and Herzog, C. and Molitor, R. and Schröder, O.}, interhash = {c99f2ae002435208c58f9244d298a10b}, intrahash = {f4ec21d5f63dbc213a3a6eae076c4b62}, month = {October}, title = {Conceptual Clustering with Iceberg Concept Lattices}, url = {http://www.kde.cs.uni-kassel.de/stumme/papers/2001/FGML01.pdf}, year = 2001 } @inproceedings{ls_leimeister, address = {Valencia, Spain}, author = {Duennebeil, S. and Sunyaev, A. and Blohm, I. and Leimeister, J. M. and Krcmar, H.}, booktitle = {3. International Conference on Health Informatics (HealthInf) 2010}, interhash = {c79ecc24f80f6572f79e40ef06342880}, intrahash = {6d8d3744dda9624c4ae1b10fed7b2e3e}, note = {163 (11-10)}, title = {Do German physicians want electronic health services? A characterization of potential adopters and rejectors in German ambulatory care}, url = {http://pubs.wi-kassel.de/wp-content/uploads/2013/03/JML_243.pdf}, year = 2010 } @inproceedings{conf/sigmod/WangWYY02, author = {Wang, Haixun and 0010, Wei Wang and Yang, Jiong and Yu, Philip S.}, booktitle = {SIGMOD Conference}, crossref = {conf/sigmod/2002}, date = {2009-06-28}, editor = {Franklin, Michael J. and Moon, Bongki and Ailamaki, Anastassia}, ee = {http://doi.acm.org/10.1145/564691.564737}, interhash = {9da0e61a2ac3ac371edfb251fbbfc2ae}, intrahash = {5ad941d8f0a06bb5e570e22a8cc58d92}, isbn = {1-58113-497-5}, pages = {394-405}, publisher = {ACM}, title = {Clustering by pattern similarity in large data sets.}, url = {http://dblp.uni-trier.de/db/conf/sigmod/sigmod2002.html#WangWYY02}, year = 2002 } @inproceedings{1283494, address = {Philadelphia, PA, USA}, author = {Arthur, David and Vassilvitskii, Sergei}, booktitle = {SODA '07: Proceedings of the eighteenth annual ACM-SIAM symposium on Discrete algorithms}, interhash = {0be633834158a3c9cba959406c3e1964}, intrahash = {553bbfa74b13c47b4e9c7c0034a8406e}, isbn = {978-0-898716-24-5}, location = {New Orleans, Louisiana}, pages = {1027--1035}, publisher = {Society for Industrial and Applied Mathematics}, title = {k-means++: the advantages of careful seeding}, year = 2007 } @article{AnkEtAl99, abstract = {Cluster analysis is a primary method for database mining. It is either used as a stand-alone tool to get insight into the distribution of a data set, e.g. to focus further analysis and data processing, or as a preprocessing step for other algorithms operating on the detected clusters. Almost all of the well-known clustering algorithms require input parameters which are hard to determine but have a significant influence on the clustering result. Furthermore, for many real-data sets there does not even exist a global parameter setting for which the result of the clustering algorithm describes the intrinsic clustering structure accurately. We introduce a new algorithm for the purpose of cluster analysis which does not produce a clustering of a data set explicitly; but instead creates an augmented ordering of the database representing its density-based clustering structure. This cluster-ordering contains information which is equivalent to the density-based clusterings corresponding to a broad range of parameter settings. It is a versatile basis for both automatic and interactive cluster analysis. We show how to automatically and efficiently extract not only 'traditional' clustering information (e.g. representative points, arbitrary shaped clusters), but also the intrinsic clustering structure. For medium sized data sets, the cluster-ordering can be represented graphically and for very large data sets, we introduce an appropriate visualization technique. Both are suitable for interactive exploration of the intrinsic clustering structure offering additional insights into the distribution and correlation of the data.}, address = {New York, NY, USA}, author = {Ankerst, Mihael and Breunig, Markus M. and Kriegel, Hans-Peter and Sander, J?rg}, doi = {http://doi.acm.org/10.1145/304181.304187}, interhash = {7417e17c0e8eec9f1a9f2bc57a476b15}, intrahash = {86b1a51b501c882f9a4f1cdacca3f7ed}, issn = {0163-5808}, journal = {ACM SIGMOD Record}, number = 2, pages = {49--60}, publisher = {ACM}, title = {OPTICS: Ordering Points to Identify the Clustering Structure}, url = {http://portal.acm.org/citation.cfm?id=304187}, volume = 28, year = 1999 } @inproceedings{bekkerman2005multiway, abstract = {We present a novel unsupervised learning scheme that simultaneously clusters variables of several types (e.g., documents, words and authors) based on pairwise interactions between the types, as observed in co-occurrence data. In this scheme, multiple clustering systems are generated aiming at maximizing an objective function that measures multiple pairwise mutual information between cluster variables. To implement this idea, we propose an algorithm that interleaves top-down clustering of some variables and bottom-up clustering of the other variables, with a local optimization correction routine. Focusing on document clustering we present an extensive empirical study of two-way, three-way and four-way applications of our scheme using six real-world datasets including the 20 News-groups (20NG) and the Enron email collection. Our multi-way distributional clustering (MDC) algorithms consistently and significantly outperform previous state-of-the-art information theoretic clustering algorithms.}, address = {New York, NY, USA}, author = {Bekkerman, Ron and El-Yaniv, Ran and McCallum, Andrew}, booktitle = {ICML '05: Proceedings of the 22nd International Conference on Machine learning}, doi = {10.1145/1102351.1102357}, interhash = {25609f84a6916c1664e61d8618f46a32}, intrahash = {2921f89f8663e7bcc122a2a77c66e7c2}, isbn = {1-59593-180-5}, location = {Bonn, Germany}, pages = {41--48}, publisher = {ACM}, title = {Multi-way distributional clustering via pairwise interactions}, url = {http://portal.acm.org/citation.cfm?id=1102351.1102357}, year = 2005 } @book{0471988642, asin = {0471988642}, author = {Hopner, Frank and Hoppner, Frank and Klawonn, Frank}, dewey = {519.53}, ean = {9780471988649}, edition = {1., Auflage}, interhash = {058759c200ced027ef0c829279aa71f4}, intrahash = {655ba43ffcd5137cdc9bff0708511116}, isbn = {0471988642}, publisher = {Wiley & Sons}, title = {Fuzzy Cluster Analysis: Methods for Classification, Data Analysis and Image Recognition}, url = {http://www.amazon.de/Fuzzy-Cluster-Analysis-Classification-Recognition/dp/0471988642%3FSubscriptionId%3D192BW6DQ43CK9FN0ZGG2%26tag%3Dws%26linkCode%3Dxm2%26camp%3D2025%26creative%3D165953%26creativeASIN%3D0471988642}, year = 1999 } @unpublished{FalBar07, author = {Falkowski, Tanja and Barth, Anja}, interhash = {72bc0bbc724d035ea119f793eb04f636}, intrahash = {754a48202afdc98227bd53128524a77f}, note = {Presented at The 4th conference on Applications of Social Network Analysis (ASNA)}, title = {Density-based Temporal Graph Clustering for Subgroup Detection in Social Networks}, year = 2007 } @article{green99hypertext, author = {Green, S.J.}, interhash = {64375af41410dfef182148d28064dbb5}, intrahash = {eb4a58e09c5bab13b745fa0bea1074a2}, journal = {IEEE Transactions on Knowledge and Data Engineering}, pages = {713--730}, title = {Building Hypertext Links By Computing Semantic Similarity}, volume = 11, year = 1999 } @inproceedings{schmitz2006content, abstract = {Recently, research projects such as PADLR and SWAP have developed tools like Edutella or Bibster, which are targeted at establishing peer-to-peer knowledge management (P2PKM) systems. In such a system, it is necessary to obtain provide brief semantic descriptions of peers, so that routing algorithms or matchmaking processes can make decisions about which communities peers should belong to, or to which peers a given query should be forwarded. This paper provides a graph clustering technique on knowledge bases for that purpose. Using this clustering, we can show that our strategy requires up to 58% fewer queries than the baselines to yield full recall in a bibliographic P2PKM scenario.}, address = {Berlin/Heidelberg}, author = {Schmitz, Christoph and Hotho, Andreas and Jäschke, Robert and Stumme, Gerd}, booktitle = {The Semantic Web: Research and Applications}, doi = {10.1007/11762256_39}, editor = {Sure, York and Domingue, John}, interhash = {d2ddbb8f90cd271dc18670e4c940ccfb}, intrahash = {1788c88e04112a4491f19dfffb8dc39e}, isbn = {978-3-540-34544-2}, issn = {0302-9743}, month = jun, pages = {530--544}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, title = {Content Aggregation on Knowledge Bases using Graph Clustering}, url = {http://www.springerlink.com/content/u121v1827v286398/}, volume = 4011, year = 2006 } @article{keyhere, abstract = {In a previous work we have adapted the Asymmetric Clustering Model (ACM) to the domain of non-attributed graphs. We use our Comb algorithm for graph matching, a population-based method which performs multi-point explorations of the discrete spaceof feasible solutions. Given this algorithm we define an incremental method to obtain a prototypical graph by fusing the elementsof the ensemble weighted by their prior probabilities of belonging to the class. Graph-matching and incremental fusion areintegrated in a EM clustering algorithm.}, author = {Lozano, Miguel and Escolano, Francisco}, interhash = {438cc31be12e6e9fd11b2ba2a5cde347}, intrahash = {c5b604392e09f7e3fbcbc98f998c7937}, journal = {Graph Based Representations in Pattern Recognition}, pages = {247--258}, title = {ACM Attributed Graph Clustering for Learning Classes of Images}, url = {http://dx.doi.org/10.1007/3-540-45028-9_22}, year = 2003 } @inproceedings{shepitsen2008personalized, abstract = {Collaborative tagging applications allow Internet users to annotate resources with personalized tags. The complex network created by many annotations, often called a folksonomy, permits users the freedom to explore tags, resources or even other user's profiles unbound from a rigid predefined conceptual hierarchy. However, the freedom afforded users comes at a cost: an uncontrolled vocabulary can result in tag redundancy and ambiguity hindering navigation. Data mining techniques, such as clustering, provide a means to remedy these problems by identifying trends and reducing noise. Tag clusters can also be used as the basis for effective personalized recommendation assisting users in navigation. We present a personalization algorithm for recommendation in folksonomies which relies on hierarchical tag clusters. Our basic recommendation framework is independent of the clustering method, but we use a context-dependent variant of hierarchical agglomerative clustering which takes into account the user's current navigation context in cluster selection. We present extensive experimental results on two real world dataset. While the personalization algorithm is successful in both cases, our results suggest that folksonomies encompassing only one topic domain, rather than many topics, present an easier target for recommendation, perhaps because they are more focused and often less sparse. Furthermore, context dependent cluster selection, an integral step in our personalization algorithm, demonstrates more utility for recommendation in multi-topic folksonomies than in single-topic folksonomies. This observation suggests that topic selection is an important strategy for recommendation in multi-topic folksonomies.}, address = {New York, NY, USA}, author = {Shepitsen, Andriy and Gemmell, Jonathan and Mobasher, Bamshad and Burke, Robin}, booktitle = {RecSys '08: Proceedings of the 2008 ACM conference on Recommender systems}, doi = {10.1145/1454008.1454048}, interhash = {c9028129dd7cd8314673bd64cbb6198e}, intrahash = {a7552f8d8d5db4f867ae6e94e1a4442f}, isbn = {978-1-60558-093-7}, location = {Lausanne, Switzerland}, pages = {259--266}, publisher = {ACM}, title = {Personalized recommendation in social tagging systems using hierarchical clustering}, url = {http://portal.acm.org/citation.cfm?id=1454008.1454048}, year = 2008 } @article{331504, abstract = {Clustering is the unsupervised classification of patterns (observations, data items, or feature vectors) into groups (clusters). The clustering problem has been addressed in many contexts and by researchers in many disciplines; this reflects its broad appeal and usefulness as one of the steps in exploratory data analysis. However, clustering is a difficult problem combinatorially, and differences in assumptions and contexts in different communities has made the transfer of useful generic concepts and methodologies slow to occur. This paper presents an overview of pattern clustering methods from a statistical pattern recognition perspective, with a goal of providing useful advice and references to fundamental concepts accessible to the broad community of clustering practitioners. We present a taxonomy of clustering techniques, and identify cross-cutting themes and recent advances. We also describe some important applications of clustering algorithms such as image segmentation, object recognition, and information retrieval.}, address = {New York, NY, USA}, author = {Jain, A. K. and Murty, M. N. and Flynn, P. J.}, doi = {http://doi.acm.org/10.1145/331499.331504}, interhash = {5113b61d428d4de4423182e5f2b2f468}, intrahash = {b19bcef82a04eb82ee4abde53ee7d1c2}, issn = {0360-0300}, journal = {ACM Comput. Surv.}, number = 3, pages = {264--323}, publisher = {ACM}, title = {Data clustering: a review}, url = {http://portal.acm.org/citation.cfm?id=331499.331504&coll=Portal&dl=ACM&CFID=26215063&CFTOKEN=18848029}, volume = 31, year = 1999 } @article{barber2007mac, abstract = {The modularity of a network quantifies the extent, relative to a null model network, to which vertices cluster into community groups. We define a null model appropriate for bipartite networks, and use it to define a bipartite modularity. The bipartite modularity is presented in terms of a modularity matrix B; some key properties of the eigenspectrum of B are identified and used to describe an algorithm for identifying modules in bipartite networks. The algorithm is based on the idea that the modules in the two parts of the network are dependent, with each part mutually being used to induce the vertices for the other part into the modules. We apply the algorithm to real-world network data, showing that the algorithm successfully identifies the modular structure of bipartite networks.}, author = {Barber, M. J.}, doi = {10.1103/PhysRevE.76.066102}, interhash = {e1d9f528c49b34ff4a05b2b0060bd653}, intrahash = {61f9d5839845d5d8fa1883a46a2f7744}, journal = {Physical Review E}, number = 6, title = {Modularity and community detection in bipartite networks}, url = {http://arxiv.org/abs/arXiv:0707.1616}, volume = 76, year = 2007 } @article{guimera2007mib, abstract = {Modularity is one of the most prominent properties of real-world complex networks. Here, we address the issue of module identification in two important classes of networks: bipartite networks and directed unipartite networks. Nodes in bipartite networks are divided into two non-overlapping sets, and the links must have one end node from each set. Directed unipartite networks only have one type of nodes, but links have an origin and an end. We show that directed unipartite networks can be conviniently represented as bipartite networks for module identification purposes. We report a novel approach especially suited for module detection in bipartite networks, and define a set of random networks that enable us to validate the new approach.}, author = {Guimer{\`a}, R. and Sales-Pardo, M. and Amaral, L.A.N.}, doi = {10.1103/PhysRevE.76.036102}, interhash = {a87821c7c8e7d5ca89cb369e6215a0f3}, intrahash = {6145a42fe04aee556fa7a68c7cea7db3}, journal = {Physical review. E, Statistical, nonlinear, and soft matter physics}, number = {3 Pt 2}, pages = 036102, publisher = {NIH Public Access}, title = {Module identification in bipartite and directed networks}, url = {http://arxiv.org/abs/physics/0701151}, volume = 76, year = 2007 } @incollection{bloehdorn2006learning, abstract = {Recent work has shown improvements in text clustering and classification tasks by integrating conceptual features extracted from ontologies. In this paper we present text mining experiments in the medical domain in which the ontological structures used are acquired automatically in an unsupervised learning process from the text corpus in question. We compare results obtained using the automatically learned ontologies with those obtained using manually engineered ones. Our results show that both types of ontologies improve results on text clustering and classification tasks, whereby the automatically acquired ontologies yield a improvement competitive with the manually engineered ones. ER -}, author = {Bloehdorn, Stephan and Cimiano, Philipp and Hotho, Andreas}, booktitle = {From Data and Information Analysis to Knowledge Engineering}, doi = {http://dx.doi.org/10.1007/3-540-31314-1_40}, interhash = {cf1af505b638677f00b3d3d7a5903199}, intrahash = {bc1d40cf4fd64780ecf712b1e40f31de}, isbn = {978-3-540-31313-7}, pages = {334--341}, publisher = {Springer Berlin Heidelberg}, title = {Learning Ontologies to Improve Text Clustering and Classification}, url = {http://www.kde.cs.uni-kassel.de/hotho/pub/2006/2006-03-gfkl05-bloehdorn-etal-learning-ontologies.pdf}, year = 2006 } @article{1751-8121-41-22-224016, abstract = {We analyze CiteULike, an online collaborative tagging system where users bookmark and annotate scientific papers. Such a system can be naturally represented as a tri-partite graph whose nodes represent papers, users and tags connected by individual tag assignments. The semantics of tags is studied here, in order to uncover the hidden relationships between tags. We find that the clustering coefficient can be used to analyze the semantical patterns among tags.}, author = {Capocci, Andrea and Caldarelli, Guido}, interhash = {14c8ecbc38dcdae876c3f4401006e3bb}, intrahash = {2a219a2664c566b405420f720583643a}, journal = {Journal of Physics A: Mathematical and Theoretical}, number = 22, pages = {224016 (7pp)}, title = {Folksonomies and clustering in the collaborative system CiteULike}, url = {http://stacks.iop.org/1751-8121/41/224016}, volume = 41, year = 2008 } @article{schikuta1996grid, abstract = {Clustering is a common technique for the analysis of large images. In this paper a new approach to hierarchical clustering of very large data sets is presented. The GRIDCLUS algorithm uses a multidimensional grid data structure to organize the value space surrounding the pattern values, rather than to organize the patterns themselves. The patterns are grouped into blocks and clustered with respect to the blocks by a topological neighbor search algorithm. The runtime behavior of the algorithm outperforms all conventional hierarchical methods. A comparison of execution times to those of other commonly used clustering algorithms, and a heuristic runtime analysis are presented}, author = {Schikuta, E.}, doi = {10.1109/ICPR.1996.546732}, interhash = {b79b4da412028c1836f1a256c81f8244}, intrahash = {cf76666a55d976df1b2b991e8e176a32}, journal = {Pattern Recognition, 1996., Proceedings of the 13th International Conference on}, month = Aug, pages = {101-105 vol.2}, title = {Grid-clustering: an efficient hierarchical clustering method for very large data sets}, volume = 2, year = 1996 } @inproceedings{zhang96birch, author = {Zhang, Tian and Ramakrishnan, Raghu and Livny, Miron}, booktitle = {Proceedings of the 1996 ACM SIGMOD International Conference on Management of Data (SIGMOD'96)}, interhash = {bd3d8e33e8785ecf66408081db016ca4}, intrahash = {d8ede3f66d485d95578bdc3eeda11fc3}, pages = {103--114}, title = {{BIRCH}: an efficient data clustering method for very large databases}, url = {http://citeseer.ist.psu.edu/zhang96birch.html}, year = 1996 } @inproceedings{conf/sigir/ZhangHZ08, author = {Zhang, Xiaodan and Hu, Xiaohua and Zhou, Xiaohua}, booktitle = {SIGIR}, crossref = {conf/sigir/2008}, date = {2008-07-27}, editor = {Myaeng, Sung-Hyon and Oard, Douglas W. and Sebastiani, Fabrizio and Chua, Tat-Seng and Leong, Mun-Kew}, ee = {http://doi.acm.org/10.1145/1390334.1390429}, interhash = {6cd350bf63f5cd5849f3abbdb7bde184}, intrahash = {9ce89fe2d1208b5528a9c4d472fba7ff}, isbn = {978-1-60558-164-4}, pages = {555-562}, publisher = {ACM}, title = {A comparative evaluation of different link types on enhancing document clustering.}, url = {http://dblp.uni-trier.de/db/conf/sigir/sigir2008.html#ZhangHZ08}, year = 2008 } @inproceedings{1281280, address = {New York, NY, USA}, author = {Xu, Xiaowei and Yuruk, Nurcan and Feng, Zhidan and Schweiger, Thomas A. J.}, booktitle = {KDD '07: Proceedings of the 13th ACM SIGKDD international conference on Knowledge discovery and data mining}, doi = {http://doi.acm.org/10.1145/1281192.1281280}, interhash = {cff0749eaf202838fd393faa1f1ea0af}, intrahash = {8dd63b723996dfa3fdff4fcfb9e3ce2e}, isbn = {978-1-59593-609-7}, location = {San Jose, California, USA}, pages = {824--833}, publisher = {ACM}, title = {SCAN: a structural clustering algorithm for networks}, url = {http://portal.acm.org/citation.cfm?doid=1281192.1281280}, year = 2007 } @inproceedings{Bickel&Scheffer04, author = {Bickel, S. and Scheffer, T.}, booktitle = {Proceedings of the IEEE International Conference on Data Mining}, interhash = {3e2001aa90bdeac09a91ee0e47552c07}, intrahash = {d7288e68d25ddb2cf8936d14a67f0c62}, title = {Multi--View Clustering}, year = 2004 } @article{chen:058701, author = {Chen, Yiping and Paul, Gerald and Havlin, Shlomo and Liljeros, Fredrik and Stanley, H. Eugene}, doi = {10.1103/PhysRevLett.101.058701}, eid = {058701}, interhash = {591effe237db9e7b8443c05390e5a6f4}, intrahash = {3409d4e03990b0ff2a9704b665adf16e}, journal = {Physical Review Letters}, number = 5, numpages = {4}, pages = 058701, publisher = {APS}, title = {Finding a Better Immunization Strategy}, url = {http://link.aps.org/abstract/PRL/v101/e058701}, volume = 101, year = 2008 } @misc{noack08modularity, abstract = { Two natural and widely used representations for the community structure of networks are clusterings, which partition the vertex set into disjoint subsets, and layouts, which assign the vertices to positions in a metric space. This paper unifies prominent characterizations of layout quality and clustering quality, by showing that energy models of pairwise attraction and repulsion subsume Newman and Girvan's modularity measure. Layouts with optimal energy are relaxations of, and are thus consistent with, clusterings with optimal modularity, which is of practical relevance because both representations are complementary and often used together.}, author = {Noack, Andreas}, interhash = {a2442ee608964a82be06224fd90d54d3}, intrahash = {0186031133dc122ffd6ff33ded32c911}, title = {Modularity clustering is force-directed layout}, url = {http://www.citebase.org/abstract?id=oai:arXiv.org:0807.4052}, year = 2008 } @inproceedings{Detecting_Commmunities_via_Simultaneous_Clustering_of_Graphs_and_Folksonomies, author = {Java, Akshay and Joshi, Anupam and Finin, Tim}, booktitle = {WebKDD 2008 Workshop on Web Mining and Web Usage Analysis}, interhash = {acfec953843b168e61e2e167e29b4c3d}, intrahash = {645abd6b3191a2a6e844d7542651ed1c}, month = {August}, note = {To Appear}, title = {Detecting Commmunities via Simultaneous Clustering of Graphs and Folksonomies}, year = 2008 } @inproceedings{1102357, address = {New York, NY, USA}, author = {Bekkerman, Ron and El-Yaniv, Ran and McCallum, Andrew}, booktitle = {ICML '05: Proceedings of the 22nd international conference on Machine learning}, doi = {http://doi.acm.org/10.1145/1102351.1102357}, interhash = {25609f84a6916c1664e61d8618f46a32}, intrahash = {a5ac489feb7407a07570f6733665a6dd}, isbn = {1-59593-180-5}, location = {Bonn, Germany}, pages = {41--48}, publisher = {ACM Press}, title = {Multi-way distributional clustering via pairwise interactions}, url = {http://www.cs.technion.ac.il/~rani/el-yaniv-papers/BekkermanEM05.pdf}, year = 2005 } @article{newman03fast, author = {Newman, M.E.J.}, interhash = {4493f03106eb8dd9db41c0ef3f667bb3}, intrahash = {56de7e6d214faebdbf2f2ef0fce09d7d}, journal = {Physical Review E}, month = {September}, title = {Fast algorithm for detecting community structure in networks}, url = {http://arxiv.org/abs/cond-mat/0309508}, volume = 69, year = 2003 } @inproceedings{xin2008www, abstract = {The success and popularity of social network systems, such as del.icio.us, Facebook, MySpace, and YouTube, have generated many interesting and challenging problems to the research community. Among others, discovering social interests shared by groups of users is very important because it helps to connect people with common interests and encourages people to contribute and share more contents. The main challenge to solving this problem comes from the diffi- culty of detecting and representing the interest of the users. The existing approaches are all based on the online connections of users and so unable to identify the common interest of users who have no online connections. In this paper, we propose a novel social interest discovery approach based on user-generated tags. Our approach is motivated by the key observation that in a social network, human users tend to use descriptive tags to annotate the contents that they are interested in. Our analysis on a large amount of real-world traces reveals that in general, user-generated tags are consistent with the web content they are attached to, while more concise and closer to the understanding and judgments of human users about the content. Thus, patterns of frequent co-occurrences of user tags can be used to characterize and capture topics of user interests. We have developed an Internet Social Interest Discovery system, ISID, to discover the common user interests and cluster users and their saved URLs by different interest topics. Our evaluation shows that ISID can effectively cluster similar documents by interest topics and discover user communities with common interests no matter if they have any online connections.}, author = {Li, Xin and Guo, Lei and Zhao, Yihong E.}, booktitle = {Proceedings of the 17th International World Wide Web Conference}, interhash = {d7e6a5b8d215682b2a75add69c01de29}, intrahash = {42b4c94cff05ccef031235d661a7a77a}, pages = {675-684}, publisher = {ACM}, title = {Tag-based Social Interest Discovery}, url = {http://www2008.org/papers/pdf/p675-liA.pdf}, year = 2008 } @inproceedings{cone2006, author = {Lee, Sei-Hyung and Daniels, Karen M.}, bdsk-file-1 = {YnBsaXN0MDDUAQIDBAUGBwpZJGFyY2hpdmVyWCR2ZXJzaW9uVCR0b3BYJG9iamVjdHNfEA9OU0tleWVkQXJjaGl2ZXISAAGGoNEICVRyb290gAGoCwwXGBkaHiVVJG51bGzTDQ4PEBMWWk5TLm9iamVjdHNXTlMua2V5c1YkY2xhc3OiERKABIAFohQVgAKAA4AHXHJlbGF0aXZlUGF0aFlhbGlhc0RhdGFfEEsuLi8uLi8uLi9QYXBlcnMvTGVlL0NvbmUgQ2×1c3RlciBMYWJlbGluZyBmb3IgU3VwcG9ydCBWZWN0b3IgQ2×1c3RlcmluZy5wZGbSGw8cHVdOUy5kYXRhTxECLgAAAAACLgACAAAJRG9jdW1lbnRzAAAAAAAAAAAAAAAAAAAAAAAAvs54rkgrAAAANyVBH0NvbmUgQ2×1c3RlciBMYWJlbGluIzJGMDk0My5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAvCUPCWn72AAAAAAAAAAAAAwADAAAJAAAAAAAAAAAAAAAAAAAAAANMZWUAABAACAAAvs5cjgAAABEACAAAwlpi1gAAAAEAFAA3JUEANxuAAACy8gAAEsYAABKtAAIATkRvY3VtZW50czpuZW1vOkRvY3VtZW50czpVbml2ZXJzaXRhOlBhcGVyczpMZWU6Q29uZSBDbHVzdGVyIExhYmVsaW4jMkYwOTQzLnBkZgAOAHAANwBDAG8AbgBlACAAQwBsAHUAcwB0AGUAcgAgAEwAYQBiAGUAbABpAG4AZwAgAGYAbwByACAAUwB1AHAAcABvAHIAdAAgAFYAZQBjAHQAbwByACAAQwBsAHUAcwB0AGUAcgBpAG4AZwAuAHAAZABmAA8AFAAJAEQAbwBjAHUAbQBlAG4AdABzABIAXS9uZW1vL0RvY3VtZW50cy9Vbml2ZXJzaXRhL1BhcGVycy9MZWUvQ29uZSBDbHVzdGVyIExhYmVsaW5nIGZvciBTdXBwb3J0IFZlY3RvciBDbHVzdGVyaW5nLnBkZgAAEwASL1ZvbHVtZXMvRG9jdW1lbnRzABUAAgAX//8AAIAG0h8gISJYJGNsYXNzZXNaJGNsYXNzbmFtZaMiIyRdTlNNdXRhYmxlRGF0YVZOU0RhdGFYTlNPYmplY3TSHyAmJ6InJFxOU0RpY3Rpb25hcnkACAARABsAJAApADIARABJAEwAUQBTAFwAYgBpAHQAfACDAIYAiACKAI0AjwCRAJMAoACqAPgA/QEFAzcDOQM+A0cDUgNWA2QDawN0A3kDfAAAAAAAAAIBAAAAAAAAACgAAAAAAAAAAAAAAAAAAAOJ}, bdsk-url-1 = {http://www.siam.org/meetings/sdm06/proceedings/046lees.pdf}, booktitle = {Proceedings of 6th SIAM Conference on Data Mining}, date-added = {2007-04-29 16:58:13 +0200}, date-modified = {2007-06-19 18:52:22 +0200}, interhash = {d3d9caa1e762806c44de928d5a24b43a}, intrahash = {76d1018ba398695e454d20de302de6e6}, month = May, pages = {484–488}, title = {Cone Cluster Labeling for Support Vector Clustering}, url = {http://www.siam.org/meetings/sdm06/proceedings/046lees.pdf}, year = 2006 } @inproceedings{1281269, address = {New York, NY, USA}, author = {Tantipathananandh, Chayant and Berger-Wolf, Tanya and Kempe, David}, booktitle = {KDD '07: Proceedings of the 13th ACM SIGKDD international conference on Knowledge discovery and data mining}, doi = {http://doi.acm.org/10.1145/1281192.1281269}, interhash = {9373b48866b4faa1941db0bee9265af0}, intrahash = {27a4fb58300979d4dbe94e75422418bd}, isbn = {978-1-59593-609-7}, location = {San Jose, California, USA}, pages = {717--726}, publisher = {ACM}, title = {A framework for community identification in dynamic social networks}, url = {http://portal.acm.org/citation.cfm?doid=1281192.1281269}, year = 2007 } @inproceedings{OsinskiSW04, author = {Osinski, Stanislaw and Stefanowski, Jerzy and Weiss, Dawid}, booktitle = {Intelligent Information Systems}, crossref = {ConfIis2004}, interhash = {ee4c7c8946a283da5d65103ce8f77a81}, intrahash = {40aba631c1ac8819bd64b0ee74bfdd1b}, pages = {359-368}, title = {Lingo: Search Results Clustering Algorithm Based on Singular Value Decomposition}, year = 2004 } @inproceedings{Approximating2008Java, abstract = {In many social media applications, a small fraction of the members are highly linked while most are sparsely connected to the network. Such a skewed distribution is sometimes referred to as the"long tail". Popular applications like meme trackers and content aggregators mine for information from only the popular blogs located at the head of this curve. On the other hand, the long tail contains large volumes of interesting information and niches. The question we address in this work is how best to approximate the community membership of entities in the long tail using only a small percentage of the entire graph structure. Our technique utilizes basic linear algebra manipulations and spectral methods. It has the advantage of quickly and efficiently finding a reasonable approximation of the community structure of the overall network. Such a method has significant applications in blog analysis engines as well as social media monitoring tools in general. }, author = {Java, Akshay and Joshi, Anupam and FininBook, Tim}, booktitle = {Proceedings of the Second International Conference on Weblogs and Social Media(ICWSM 2008)}, date = {2008 Abstract:}, interhash = {ede357e110fee8803dc181d262f30087}, intrahash = {386f36679c111f30e37ced272d5b355c}, publisher = {AAAI Press}, title = {Approximating the Community Structure of the Long Tail}, url = {http://ebiquity.umbc.edu/paper/html/id/381/Approximating-the-Community-Structure-of-the-Long-Tail}, year = 2008 } @inproceedings{baker98distributional, address = {Melbourne, AU}, author = {Baker, L. Douglas and McCallum, Andrew K.}, booktitle = {Proceedings of {SIGIR}-98, 21st {ACM} International Conference on Research and Development in Information Retrieval}, editor = {Croft, W. Bruce and Moffat, Alistair and van Rijsbergen, Cornelis J. and Wilkinson, Ross and Zobel, Justin}, interhash = {f116fa6b3ef1eefecb8bf27dfaa53ee7}, intrahash = {e472dc4e61921ed15175756fcd9fea6a}, pages = {96--103}, publisher = {ACM Press, New York, US}, title = {Distributional clustering of words for text classification}, url = {citeseer.ist.psu.edu/baker98distributional.html}, year = 1998 } @inproceedings{grahl2007clustering, abstract = {Currently, social bookmarking systems provide intuitive support for browsing locally their content. A global view is usually presented by the tag cloud of the system, but it does not allow a conceptual drill-down, e. g., along a conceptual hierarchy. In this paper, we present a clustering approach for computing such a conceptual hierarchy for a given folksonomy. The hierarchy is complemented with ranked lists of users and resources most related to each cluster. The rankings are computed using our FolkRank algorithm. We have evaluated our approach on large scale data from the del.icio.us bookmarking system.}, address = {Graz, Austria}, author = {Grahl, Miranda and Hotho, Andreas and Stumme, Gerd}, booktitle = {7th International Conference on Knowledge Management (I-KNOW '07)}, interhash = {5cf58d2fdd3c17f0b0c54ce098ff5b60}, intrahash = {334d3ab11400c4a3ea3ed5b1e95c1855}, issn = {0948-695x}, month = SEP, pages = {356-364}, publisher = {Know-Center}, title = {Conceptual Clustering of Social Bookmarking Sites}, vgwort = {14}, year = 2007 } @inproceedings{grahl07conceptualKdml, author = {Grahl, Miranda and Hotho, Andreas and Stumme, Gerd}, booktitle = {Workshop Proceedings of Lernen - Wissensentdeckung - Adaptivität (LWA 2007)}, editor = {Hinneburg, Alexander}, interhash = {9c3bb05456bf11bcd88a1135de51f7d9}, intrahash = {6d5188d66564fe4ed7386e28868504de}, isbn = {978-3-86010-907-6}, month = sep, pages = {50-54}, publisher = {Martin-Luther-Universität Halle-Wittenberg}, title = {Conceptual Clustering of Social Bookmark Sites}, url = {http://www.kde.cs.uni-kassel.de/hotho/pub/2007/kdml_recommender_final.pdf}, vgwort = {14}, year = 2007 } @inproceedings{658040, address = {Washington, DC, USA}, author = {Hotho, Andreas and Maedche, Alexander and Staab, Steffen}, booktitle = {ICDM '01: Proceedings of the 2001 IEEE International Conference on Data Mining}, interhash = {e2f356aeefc84fd73c9bcdc08392edf0}, intrahash = {a6803e87c5145d5f55d7bb1bab8dfd67}, isbn = {0-7695-1119-8}, pages = {607--608}, publisher = {IEEE Computer Society}, title = {Text Clustering Based on Good Aggregations}, url = {http://portal.acm.org/citation.cfm?id=658040}, year = 2001 } @inproceedings{FalBarSpi07, author = {Falkowski, Tanja and Barth, Anja and Spiliopoulou, Myra}, booktitle = {In Proc. of the 2007 IEEE / WIC / ACM International Conference on Web Intelligence,}, interhash = {abd9653fc405547fd263c72c5bc5ae88}, intrahash = {c0f9b82222d0c9a0b1cb0a5fa41a735a}, pages = {112-115}, title = {DENGRAPH: A Density-based Community Detection Algorithm}, url = {http://wwwiti.cs.uni-magdeburg.de/~tfalkows/publ/2007/WI_FalBarSpi07.pdf}, year = 2007 } @inproceedings{hotho_fgml02, author = {Hotho, A. and Stumme, G.}, booktitle = {Proceedings of FGML Workshop}, interhash = {3dd3d4ce38d0de0ba8e167f8133cbb3e}, intrahash = {18fdbebb76d48feccf2dceed23f4cd74}, pages = {37-45}, publisher = {Special Interest Group of German Informatics Society (FGML --- Fachgruppe Maschinelles Lernen der GI e.V.)}, title = {Conceptual Clustering of Text Clusters}, url = {\url{http://www.aifb.uni-karlsruhe.de/WBS/aho/pub/tc_fca_2002_submit.pdf}}, year = 2002 } @inproceedings{schmitz2006content, address = {Budva, Montenegro}, author = {Schmitz, Christoph and Hotho, Andreas and J\"aschke, Robert and Stumme, Gerd}, booktitle = {Proceedings of the 3rd European Semantic Web Conference}, interhash = {940fa3c671c771cc9a644b3ecfef43cd}, intrahash = {9a06428ec3bd72e3ea6c7a8f08e2bb85}, isbn = {3-540-34544-2}, month = {June}, pages = {530-544}, publisher = {Springer}, series = {LNCS}, title = {Content Aggregation on Knowledge Bases using Graph Clustering}, url = {http://www.kde.cs.uni-kassel.de/hotho/pub/2006/schmitz2006sumarize_eswc.pdf}, vgwort = {27}, volume = 4011, year = 2006 } @inproceedings{cim04c, address = {Valencia, Spain}, author = {Cimiano, Philipp and Hotho, Andreas and Staab, Steffen}, booktitle = {Proceedings of the European Conference on Artificial Intelligence (ECAI'04)}, editor = {de M{\'a}ntaras, Ramon L{\'o}pez and Saitta, Lorenza}, interhash = {5ebc73142f0c4d51a1037432435bab94}, intrahash = {48d35aa9a4d727e221c90f959462b7b2}, isbn = {1-58603-452-9}, pages = {435-439}, publisher = {IOS Press}, title = {Comparing Conceptual, Divise and Agglomerative Clustering for Learning Taxonomies from Text}, url = {http://www.kde.cs.uni-kassel.de/hotho/pub/2004/ecai04.pdf}, year = 2004 }