@inproceedings{stumme01conceptualclustering, address = {Universität Dortmund 763}, author = {Stumme, G. and Taouil, R. and Bastide, Y. and Lakhal, L.}, booktitle = {Proc. GI-Fachgruppentreffen Maschinelles Lernen (FGML'01)}, editor = {Klinkenberg, R. and Rüping, S. and Fick, A. and Henze, N. and Herzog, C. and Molitor, R. and Schröder, O.}, interhash = {c99f2ae002435208c58f9244d298a10b}, intrahash = {f4ec21d5f63dbc213a3a6eae076c4b62}, month = {October}, title = {Conceptual Clustering with Iceberg Concept Lattices}, url = {http://www.kde.cs.uni-kassel.de/stumme/papers/2001/FGML01.pdf}, year = 2001 } @book{tango2010statistical, abstract = {The development of powerful computing environment and the geographical information system (GIS) in recent decades has thrust the analysis of geo-referenced disease incidence data into the mainstream of spatial epidemiology. This book offers a modern perspective on statistical methods for detecting disease clustering, an indispensable procedure to find a statistical evidence on aetiology of the disease under study. With increasing public health concerns about environmental risks, the need for sophisticated methods for analyzing spatial health events is immediate. Furthermore, the research area of statistical methods for disease clustering now attracts a wide audience due to the perceived need to implement wide-ranging monitoring systems to detect possible health-related events such as the occurrence of the severe acute respiratory syndrome (SARS), pandemic influenza and bioterrorism}, address = {New York, NY}, author = {Tango, Toshiro}, edition = 1, format = {ebook}, interhash = {20ae4f5773d215aded8304c02a071251}, intrahash = {aa3f9cbf0e4ff83c7323cb2f1d7422eb}, isbn = {1441915729 (Sekundärausgabe)}, primaryauthor = {Tango, Toshiro}, publisher = {Springer New York}, series = {Statistics for Biology and Health}, shorttitle = {Statistical Methods for Disease Clustering}, subtitle = {[Elektronische Ressource] / by Toshiro Tango}, title = {Statistical Methods for Disease Clustering}, titlestatement = {by Toshiro Tango}, uniqueid = {HEB221142568}, url = {http://scans.hebis.de/HEBCGI/show.pl?22114256_aub.html}, year = 2010 } @inproceedings{zhang1996birch, acmid = {233324}, address = {New York, NY, USA}, author = {Zhang, Tian and Ramakrishnan, Raghu and Livny, Miron}, booktitle = {Proceedings of the 1996 ACM SIGMOD International Conference on Management of Data}, doi = {10.1145/233269.233324}, interhash = {bd3d8e33e8785ecf66408081db016ca4}, intrahash = {250cecc10ceecd05a96bed00b6cf0fd7}, isbn = {0-89791-794-4}, location = {Montreal, Quebec, Canada}, numpages = {12}, pages = {103--114}, publisher = {ACM}, series = {SIGMOD '96}, title = {BIRCH: An Efficient Data Clustering Method for Very Large Databases}, url = {http://doi.acm.org/10.1145/233269.233324}, year = 1996 } @article{Jain:1999:DCR:331499.331504, abstract = {Clustering is the unsupervised classification of patterns (observations, data items, or feature vectors) into groups (clusters). The clustering problem has been addressed in many contexts and by researchers in many disciplines; this reflects its broad appeal and usefulness as one of the steps in exploratory data analysis. However, clustering is a difficult problem combinatorially, and differences in assumptions and contexts in different communities has made the transfer of useful generic concepts and methodologies slow to occur. This paper presents an overviewof pattern clustering methods from a statistical pattern recognition perspective, with a goal of providing useful advice and references to fundamental concepts accessible to the broad community of clustering practitioners. We present a taxonomy of clustering techniques, and identify cross-cutting themes and recent advances. We also describe some important applications of clustering algorithms such as image segmentation, object recognition, and information retrieval.}, acmid = {331504}, address = {New York, NY, USA}, author = {Jain, A. K. and Murty, M. N. and Flynn, P. J.}, doi = {10.1145/331499.331504}, hans = {otto}, interhash = {5113b61d428d4de4423182e5f2b2f468}, intrahash = {bd7234f7139a1651acfaed57b5c2551f}, issn = {0360-0300}, issue_date = {Sept. 1999}, journal = {ACM Comput. Surv.}, month = sep, number = 3, numpages = {60}, pages = {264--323}, publisher = {ACM}, title = {Data Clustering: A Review}, url = {http://doi.acm.org/10.1145/331499.331504}, volume = 31, year = 1999 } @book{DBLP:books/crc/aggarwal2013, bibsource = {DBLP, http://dblp.uni-trier.de}, editor = {Aggarwal, Charu C. and Reddy, Chandan K.}, ee = {http://www.crcpress.com/product/isbn/9781466558212, http://www.charuaggarwal.net/clusterbook.pdf}, interhash = {5f150f838457faaa3805b0ed034c845f}, intrahash = {7f1541e5800e6c36c67dd6bc0ef64ba7}, isbn = {978-1-46-655821-2}, publisher = {CRC Press}, title = {Data Clustering: Algorithms and Applications}, url = {http://www.charuaggarwal.net/clusterbook.pdf}, year = 2014 } @article{Jain:1999:DCR:331499.331504, acmid = {331504}, address = {New York, NY, USA}, author = {Jain, A. K. and Murty, M. N. and Flynn, P. J.}, doi = {10.1145/331499.331504}, interhash = {5113b61d428d4de4423182e5f2b2f468}, intrahash = {b19bcef82a04eb82ee4abde53ee7d1c2}, issn = {0360-0300}, issue_date = {Sept. 1999}, journal = {ACM Comput. Surv.}, month = sep, number = 3, numpages = {60}, pages = {264--323}, publisher = {ACM}, title = {Data clustering: a review}, url = {http://doi.acm.org/10.1145/331499.331504}, volume = 31, year = 1999 } @article{RePEc:eee:csdana:v:41:y:2002:i:1:p:59-90, abstract = {No abstract is available for this item.}, author = {Dhillon, Inderjit S. and Modha, Dharmendra S. and Spangler, W. Scott}, interhash = {3ff82dddf6ce4d86909347824554ddf8}, intrahash = {03e92f40796a0093a6e882a83f5cd995}, journal = {Computational Statistics \& Data Analysis}, month = {November}, number = 1, pages = {59-90}, title = {Class visualization of high-dimensional data with applications}, url = {http://www.cs.utexas.edu/~inderjit/public_papers/csda.pdf}, volume = 41, year = 2002 } @electronic{nedjah2009intelligent, abstract = {"Automatic Text Categorization and Clustering are becoming more and more important as the amount of text in electronic format grows and the access to it becomes more necessary and widespread. Well known applications are spam filtering and web search, but a large number of everyday uses exists (intelligent web search, data mining, law enforcement, etc.). Currently, researchers are employing many intelligent techniques for text categorization and clustering, ranging from support vector machines and neural networks to Bayesian inference and algebraic methods, such as Latent Semantic Indexing." "This volume offers a wide spectrum of research work developed for intelligent text categorization and clustering."--Jacket.}, address = {Berlin}, author = {Nedjah, Nadia}, interhash = {fe4dc424274eac3c1588fda8bfa5290a}, intrahash = {1a61a34d4984ee4451be75902c25c49b}, isbn = {9783540856443 3540856447 9783540856436 3540856439}, publisher = {Springer}, refid = {656393969}, title = {Intelligent text categorization and clustering}, url = {http://rave.ohiolink.edu/ebooks/ebc/9783540856443}, year = 2009 } @incollection{springerlink:10.1007/978-3-540-74839-7_12, abstract = {Modularity is a recently introduced quality measure for graph clusterings. It has immediately received considerable attention in several disciplines, and in particular in the complex systems literature, although its properties are not well understood. We study the problem of finding clusterings with maximum modularity, thus providing theoretical foundations for past and present work based on this measure. More precisely, we prove the conjectured hardness of maximizing modularity both in the general case and with the restriction to cuts, and give an Integer Linear Programming formulation. This is complemented by first insights into the behavior and performance of the commonly applied greedy agglomaration approach.}, address = {Berlin / Heidelberg}, affiliation = {Department of Computer and Information Science, University of Konstanz}, author = {Brandes, Ulrik and Delling, Daniel and Gaertler, Marco and Görke, Robert and Hoefer, Martin and Nikoloski, Zoran and Wagner, Dorothea}, booktitle = {Graph-Theoretic Concepts in Computer Science}, doi = {10.1007/978-3-540-74839-7_12}, editor = {Brandstädt, Andreas and Kratsch, Dieter and Müller, Haiko}, interhash = {b335302041d1865d7cfec7467e8e2999}, intrahash = {6fd10991ee4e3880c64c11862884ead7}, isbn = {978-3-540-74838-0}, keyword = {Computer Science}, openurl = {http://www.blub.de}, pages = {121-132}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, title = {On Finding Graph Clusterings with Maximum Modularity}, url = {http://dx.doi.org/10.1007/978-3-540-74839-7_12}, volume = 4769, year = 2007 } @article{newman2006modularity, abstract = {Many networks of interest in the sciences, including social networks, computer networks, and metabolic and regulatory networks, are found to divide naturally into communities or modules. The problem of detecting and characterizing this community structure is one of the outstanding issues in the study of networked systems. One highly effective approach is the optimization of the quality function known as “modularity” over the possible divisions of a network. Here I show that the modularity can be expressed in terms of the eigenvectors of a characteristic matrix for the network, which I call the modularity matrix, and that this expression leads to a spectral algorithm for community detection that returns results of demonstrably higher quality than competing methods in shorter running times. I illustrate the method with applications to several published network data sets.}, author = {Newman, M. E. J.}, doi = {10.1073/pnas.0601602103}, interhash = {e664336d414a1e21d89f30cc56f5e739}, intrahash = {5dd9d0c2155f242393e63547d8a2347f}, journal = {Proceedings of the National Academy of Sciences}, number = 23, pages = {8577--8582}, title = {Modularity and community structure in networks}, volume = 103, year = 2006 } @inproceedings{schmitz2006content, abstract = {Recently, research projects such as PADLR and SWAP have developed tools like Edutella or Bibster, which are targeted at establishing peer-to-peer knowledge management (P2PKM) systems. In such a system, it is necessary to obtain provide brief semantic descriptions of peers, so that routing algorithms or matchmaking processes can make decisions about which communities peers should belong to, or to which peers a given query should be forwarded. This paper provides a graph clustering technique on knowledge bases for that purpose. Using this clustering, we can show that our strategy requires up to 58% fewer queries than the baselines to yield full recall in a bibliographic P2PKM scenario.}, address = {Heidelberg}, author = {Schmitz, Christoph and Hotho, Andreas and Jäschke, Robert and Stumme, Gerd}, booktitle = {The Semantic Web: Research and Applications}, editor = {Sure, York and Domingue, John}, interhash = {d2ddbb8f90cd271dc18670e4c940ccfb}, intrahash = {1788c88e04112a4491f19dfffb8dc39e}, pages = {530-544}, publisher = {Springer}, series = {LNAI}, title = {Content Aggregation on Knowledge Bases using Graph Clustering}, url = {http://www.kde.cs.uni-kassel.de/stumme/papers/2006/schmitz2006content.pdf}, volume = 4011, year = 2006 } @inproceedings{grahl07conceptualKdml, author = {Grahl, Miranda and Hotho, Andreas and Stumme, Gerd}, booktitle = {Workshop Proceedings of Lernen -- Wissensentdeckung -- Adaptivität (LWA 2007)}, editor = {Hinneburg, Alexander}, interhash = {9c3bb05456bf11bcd88a1135de51f7d9}, intrahash = {6d5188d66564fe4ed7386e28868504de}, isbn = {978-3-86010-907-6}, month = sep, pages = {50-54}, publisher = {Martin-Luther-Universität Halle-Wittenberg}, title = {Conceptual Clustering of Social Bookmark Sites}, url = {http://www.kde.cs.uni-kassel.de/hotho/pub/2007/kdml_recommender_final.pdf}, vgwort = {14}, year = 2007 } @inproceedings{ls_leimeister, address = {Valencia, Spain}, author = {Duennebeil, S. and Sunyaev, A. and Blohm, I. and Leimeister, J. M. and Krcmar, H.}, booktitle = {3. International Conference on Health Informatics (HealthInf) 2010}, interhash = {c79ecc24f80f6572f79e40ef06342880}, intrahash = {6d8d3744dda9624c4ae1b10fed7b2e3e}, note = {163 (11-10)}, title = {Do German physicians want electronic health services? A characterization of potential adopters and rejectors in German ambulatory care}, url = {http://www.uni-kassel.de/fb7/ibwl/leimeister/pub/JML_150.pdf}, year = 2010 } @inproceedings{Lu:2009:ETN:1645953.1646167, abstract = {In this poster, we investigate how to enhance web clustering by leveraging the tripartite network of social tagging systems. We propose a clustering method, called "Tripartite Clustering", which cluster the three types of nodes (resources, users and tags) simultaneously based on the links in the social tagging network. The proposed method is experimented on a real-world social tagging dataset sampled from del.icio.us. We also compare the proposed clustering approach with K-means. All the clustering results are evaluated against a human-maintained web directory. The experimental results show that Tripartite Clustering significantly outperforms the content-based K-means approach and achieves performance close to that of social annotation-based K-means whereas generating much more useful information.}, acmid = {1646167}, address = {New York, NY, USA}, author = {Lu, Caimei and Chen, Xin and Park, E. K.}, booktitle = {Proceeding of the 18th ACM conference on Information and knowledge management}, doi = {10.1145/1645953.1646167}, interhash = {e192e53972f28d78f1ecbffbfea08bed}, intrahash = {86160cf68758ec60922323a34a7833f0}, isbn = {978-1-60558-512-3}, location = {Hong Kong, China}, numpages = {4}, pages = {1545--1548}, publisher = {ACM}, series = {CIKM '09}, title = {Exploit the tripartite network of social tagging for web clustering}, url = {http://doi.acm.org/10.1145/1645953.1646167}, year = 2009 } @article{Carpineto:2009:SWC:1541880.1541884, abstract = {Web clustering engines organize search results by topic, thus offering a complementary view to the flat-ranked list returned by conventional search engines. In this survey, we discuss the issues that must be addressed in the development of a Web clustering engine, including acquisition and preprocessing of search results, their clustering and visualization. Search results clustering, the core of the system, has specific requirements that cannot be addressed by classical clustering algorithms. We emphasize the role played by the quality of the cluster labels as opposed to optimizing only the clustering structure. We highlight the main characteristics of a number of existing Web clustering engines and also discuss how to evaluate their retrieval performance. Some directions for future research are finally presented.}, acmid = {1541884}, address = {New York, NY, USA}, articleno = {17}, author = {Carpineto, Claudio and Osi\'{n}ski, Stanislaw and Romano, Giovanni and Weiss, Dawid}, doi = {10.1145/1541880.1541884}, interhash = {95beef372c0d7c6f57caf0862896a0bb}, intrahash = {1921bab51019d89a0b740c43d8aafd23}, issn = {0360-0300}, issue = {3}, issue_date = {July 2009}, journal = {ACM Comput. Surv.}, month = {July}, numpages = {38}, pages = {17:1--17:38}, publisher = {ACM}, title = {A survey of Web clustering engines}, url = {http://doi.acm.org/10.1145/1541880.1541884}, volume = 41, year = 2009 } @inproceedings{conf/mldm/ToivonenVVBV01, author = {Toivonen, Jarmo and Visa, Ari and Vesanen, Tomi and Back, Barbro and Vanharanta, Hannu}, booktitle = {MLDM}, crossref = {conf/mldm/2001}, editor = {Perner, Petra}, ee = {http://dx.doi.org/10.1007/3-540-44596-X_15}, interhash = {2121b03b46ecdde012bae15ca8cf8ce6}, intrahash = {2f23db9219b4d693acf15d7401684499}, isbn = {3-540-42359-1}, pages = {184-195}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, title = {Validation of Text Clustering Based on Document Contents.}, url = {http://dblp.uni-trier.de/db/conf/mldm/mldm2001.html#ToivonenVVBV01}, volume = 2123, year = 2001 } @misc{Ren2011, abstract = { It has been known for a long time that citation networks are always highly clustered, such as the existences of abundant triangles and high clustering coefficient. In a growth model, one typical way to produce clustering is using the trid formation mechanism. However, we find that this mechanism fails to generate enough triangles in a real-world citation network. By analyzing the network, it is found that one paper always cites papers that are already highly connected. We point out that the highly connected papers may refer to similar research topic and one subsequent paper tends to cite all of them. Based on this assumption, we propose a growth model for citation networks in which a new paper i firstly attaches to one relevant paper j and then with a probability links those papers in the same clique to which j belongs. We compare our model to two real-world citation networks - one on a special research area and the other on multidisciplinary sciences. Results show that for the two networks the in-degree distributions are matched and the clustering features, i.e., the number of triangles and the average clustering coefficient, are well reproduced. }, author = {Ren, Fu-Xin and Cheng, Xue-Qi and Shen, Hua-Wei}, interhash = {2aab1505ce7da27402449873fb57b48e}, intrahash = {d668e639ed78f4c7ec53eeba64d8ae2a}, note = {cite arxiv:1104.4209}, title = {Modeling the clustering in citation networks}, url = {http://arxiv.org/abs/1104.4209}, year = 2011 } @phdthesis{bade2009personalized, author = {Bade, Korinna}, file = {bade2009personalized.pdf:bade2009personalized.pdf:PDF}, groups = {public}, interhash = {72c0a3797c9dbe71e9fa4778c51e653f}, intrahash = {47cf055e43db23e10fbf5bb0a446d730}, school = {Otto-von-Guericke-Universitat Magdeburg}, timestamp = {2011.07.29}, title = {Personalized Hierarchical Structuring}, username = {dbenz}, year = 2009 } @incollection{radelaar2011improving, affiliation = {Erasmus University Rotterdam, PO Box 1738, NL-3000 Rotterdam, The Netherlands}, author = {Radelaar, Joni and Boor, Aart-Jan and Vandic, Damir and van Dam, Jan-Willem and Hogenboom, Frederik and Frasincar, Flavius}, booktitle = {Web Engineering}, editor = {Auer, Sören and Díaz, Oscar and Papadopoulos, George}, interhash = {48fe306f42bc405a5f8ae0f4a8885f3a}, intrahash = {77bc7f7e46481b47c11dd9e53d5741e0}, note = {10.1007/978-3-642-22233-7_19}, pages = {274-288}, publisher = {Springer Berlin / Heidelberg}, series = {Lecture Notes in Computer Science}, title = {Improving the Exploration of Tag Spaces Using Automated Tag Clustering}, url = {http://dx.doi.org/10.1007/978-3-642-22233-7_19}, volume = 6757, year = 2011 } @article{Leicht08community, author = {Leicht, E. A. and Newman, M. E. J.}, doi = {10.1103/PhysRevLett.100.118703}, interhash = {825411a28bde71cda1c9087fc329d963}, intrahash = {93726cc0540f75ee1cb515b2923d69e8}, journal = {Phys. Rev. Lett.}, month = mar, number = 11, numpages = {4}, pages = 118703, publisher = {American Physical Society}, title = {Community Structure in Directed Networks}, volume = 100, year = 2008 } @inproceedings{giannakidou2008coclustering, abstract = {Under social tagging systems, a typical Web 2.0 application, users label digital data sources by using freely chosen textual descriptions (tags). Poor retrieval in the aforementioned systems remains a major problem mostly due to questionable tag validity and tag ambiguity. Earlier clustering techniques have shown limited improvements, since they were based mostly on tag co-occurrences. In this paper, a co-clustering approach is employed, that exploits joint groups of related tags and social data sources, in which both social and semantic aspects of tags are considered simultaneously. Experimental results demonstrate the efficiency and the beneficial outcome of the proposed approach in correlating relevant tags and resources.}, author = {Giannakidou, Eirini and Koutsonikola, Vassiliki A. and Vakali, Athena and Kompatsiaris, Yiannis}, booktitle = {WAIM}, crossref = {conf/waim/2008}, ee = {http://dx.doi.org/10.1109/WAIM.2008.61}, file = {giannakidou2008coclustering.pdf:giannakidou2008coclustering.pdf:PDF}, groups = {public}, interhash = {bf55ee73fa8e8e370cffe8ef7bb9cd60}, intrahash = {2b24046689df977f7853b557c04689f3}, isbn = {978-0-7695-3185-4}, pages = {317-324}, publisher = {IEEE}, timestamp = {2011-02-17 11:00:40}, title = {Co-Clustering Tags and Social Data Sources.}, url = {http://dblp.uni-trier.de/db/conf/waim/waim2008.html#GiannakidouKVK08}, username = {dbenz}, year = 2008 } @article{cilibrasi2005clustering, abstract = { We present a new method for clustering based on compression. The method does not use subject-specific features or background knowledge, and works as follows: First, we determine a parameter-free, universal, similarity distance, the normalized compression distance or NCD, computed from the lengths of compressed data files (singly and in pairwise concatenation). Second, we apply a hierarchical clustering method. The NCD is not restricted to a specific application area, and works across application area boundaries. A theoretical precursor, the normalized information distance, co-developed by one of the authors, is provably optimal. However, the optimality comes at the price of using the noncomputable notion of Kolmogorov complexity. We propose axioms to capture the real-world setting, and show that the NCD approximates optimality. To extract a hierarchy of clusters from the distance matrix, we determine a dendrogram (ternary tree) by a new quartet method and a fast heuristic to implement it. The method is implemented and available as public software, and is robust under choice of different compressors. To substantiate our claims of universality and robustness, we report evidence of successful application in areas as diverse as genomics, virology, languages, literature, music, handwritten digits, astronomy, and combinations of objects from completely different domains, using statistical, dictionary, and block sorting compressors. In genomics, we presented new evidence for major questions in Mammalian evolution, based on whole-mitochondrial genomic analysis: the Eutherian orders and the Marsupionta hypothesis against the Theria hypothesis.}, author = {Cilibrasi, R. and Vitanyi, P.M.B.}, doi = {10.1109/TIT.2005.844059}, interhash = {2016d3da3ebb9d17fdf0be152c2f2069}, intrahash = {5156d51daa332b82b27cc4665dbff1f5}, issn = {0018-9448}, journal = {IEEE Transactions on Information Theory}, month = {April}, number = 4, pages = { 1523-1545}, title = {Clustering by compression}, volume = 51, year = 2005 } @article{talavera2001generalitybased, address = {Los Alamitos, CA, USA}, author = {Talavera, Luis and B{\'e}jar, Javier}, doi = {10.1109/34.908969,}, interhash = {c6c47f26f4793b3fedd46209796e792c}, intrahash = {358a8cba2b3442748874b422fb28e7f9}, issn = {0162-8828}, journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence}, number = 2, pages = {196-206}, publisher = {IEEE Computer Society}, title = {Generality-Based Conceptual Clustering with Probabilistic Concepts}, url = {http://www.computer.org/portal/web/csdl/doi/10.1109/34.908969}, volume = 23, year = 2001 } @inproceedings{vinh2009information, abstract = {Information theoretic based measures form a fundamental class of similarity measures for comparing clusterings, beside the class of pair-counting based and set-matching based measures. In this paper, we discuss the necessity of correction for chance for information theoretic based measures for clusterings comparison. We observe that the baseline for such measures, i.e. average value between random partitions of a data set, does not take on a constant value, and tends to have larger variation when the ratio between the number of data points and the number of clusters is small. This effect is similar in some other non-information theoretic based measures such as the well-known Rand Index. Assuming a hypergeometric model of randomness, we derive the analytical formula for the expected mutual information value between a pair of clusterings, and then propose the adjusted version for several popular information theoretic based measures. Some examples are given to demonstrate the need and usefulness of the adjusted measures.}, address = {New York, NY, USA}, author = {Vinh, Nguyen Xuan and Epps, Julien and Bailey, James}, booktitle = {ICML '09: Proceedings of the 26th Annual International Conference on Machine Learning}, doi = {10.1145/1553374.1553511}, interhash = {ddd96b934438029873242aeabc26a201}, intrahash = {bed9702898bc8c50faa21eabd068b8d9}, isbn = {978-1-60558-516-1}, location = {Montreal, Quebec, Canada}, pages = {1073--1080}, publisher = {ACM}, title = {Information theoretic measures for clusterings comparison: is a correction for chance necessary?}, url = {http://portal.acm.org/citation.cfm?id=1553511}, year = 2009 } @inproceedings{shepitsen2008personalized, abstract = {Collaborative tagging applications allow Internet users to annotate resources with personalized tags. The complex network created by many annotations, often called a folksonomy, permits users the freedom to explore tags, resources or even other user's profiles unbound from a rigid predefined conceptual hierarchy. However, the freedom afforded users comes at a cost: an uncontrolled vocabulary can result in tag redundancy and ambiguity hindering navigation. Data mining techniques, such as clustering, provide a means to remedy these problems by identifying trends and reducing noise. Tag clusters can also be used as the basis for effective personalized recommendation assisting users in navigation. We present a personalization algorithm for recommendation in folksonomies which relies on hierarchical tag clusters. Our basic recommendation framework is independent of the clustering method, but we use a context-dependent variant of hierarchical agglomerative clustering which takes into account the user's current navigation context in cluster selection. We present extensive experimental results on two real world dataset. While the personalization algorithm is successful in both cases, our results suggest that folksonomies encompassing only one topic domain, rather than many topics, present an easier target for recommendation, perhaps because they are more focused and often less sparse. Furthermore, context dependent cluster selection, an integral step in our personalization algorithm, demonstrates more utility for recommendation in multi-topic folksonomies than in single-topic folksonomies. This observation suggests that topic selection is an important strategy for recommendation in multi-topic folksonomies.}, address = {New York, NY, USA}, author = {Shepitsen, Andriy and Gemmell, Jonathan and Mobasher, Bamshad and Burke, Robin}, booktitle = {RecSys '08: Proceedings of the 2008 ACM conference on Recommender systems}, doi = {http://doi.acm.org/10.1145/1454008.1454048}, interhash = {c9028129dd7cd8314673bd64cbb6198e}, intrahash = {a7552f8d8d5db4f867ae6e94e1a4442f}, isbn = {978-1-60558-093-7}, location = {Lausanne, Switzerland}, pages = {259--266}, publisher = {ACM}, title = {Personalized recommendation in social tagging systems using hierarchical clustering}, url = {http://portal.acm.org/citation.cfm?id=1454008.1454048}, year = 2008 } @inproceedings{delling2007engineering, author = {Delling, Daniel and Gaertler, Marco and G{\"o}rke, Robert and Wagner, Dorothea}, booktitle = {Proceedings of the European Conference of Complex Systems (ECCS'07)}, interhash = {b0b92b2ead46ef60435173a6fb803045}, intrahash = {48417fa551e51439159e5fdd575825df}, month = {October}, note = {as poster}, pdf = {http://i11www.ira.uka.de/algo/people/rgoerke/publications/pdf/dggw-ecgc-07_poster.pdf}, title = {Engineering Comparators for Graph Clusterings}, url = {http://i11www.ira.uka.de/algo/people/rgoerke/publications/pdf/dggw-ecgc-07_poster.pdf}, year = 2007 } @inproceedings{delling2007engineeringa, author = {Delling, Daniel and Gaertler, Marco and G{\"o}rke, Robert and Nikoloski, Zoran and Wagner, Dorothea}, booktitle = {Proceedings of the European Conference of Complex Systems (ECCS'07)}, interhash = {5c88c6ad4f9a66de094125b3ce600a55}, intrahash = {fa6b1f4966b69da84f9582c2aba82cab}, month = {October}, note = {as poster}, pdf = {http://i11www.ira.uka.de/algo/people/rgoerke/publications/pdf/dggnw-eect-07_poster.pdf}, title = {Engineering the Evaluation of Clustering Techniques}, url = {http://i11www.ira.uka.de/algo/people/rgoerke/publications/pdf/dggnw-eect-07_poster.pdf}, year = 2007 } @inproceedings{cattuto2007emergent, address = {Dresden, Germany}, author = {Cattuto, Ciro and Baldassarri, Andrea and Servedio, Vito D. P. and Loreto, Vittorio}, booktitle = {Proceedings of the European Confeence on Complex Systems}, interhash = {9afde66e2d53e2f23bed303f7bda30af}, intrahash = {3977cdaf1ce7a4c500ac5cfd5a91c9e5}, month = {October}, title = {Emergent Community Structure in Social Tagging Systems}, year = 2007 } @misc{capocci2007taxonomy, abstract = { In this paper we investigate the nature and structure of the relation between imposed classifications and real clustering in a particular case of a scale-free network given by the on-line encyclopedia Wikipedia. We find a statistical similarity in the distributions of community sizes both by using the top-down approach of the categories division present in the archive and in the bottom-up procedure of community detection given by an algorithm based on the spectral properties of the graph. Regardless the statistically similar behaviour the two methods provide a rather different division of the articles, thereby signaling that the nature and presence of power laws is a general feature for these systems and cannot be used as a benchmark to evaluate the suitability of a clustering method.}, author = {Capocci, A. and Rao, F. and Caldarelli, G.}, interhash = {df8a20aa40cce46aa0adf4f6360664dc}, intrahash = {9c69bc97d22b7e5c2d90d8765b491a16}, title = {Taxonomy and clustering in collaborative systems: the case of the on-line encyclopedia Wikipedia}, url = {http://www.citebase.org/abstract?id=oai:arXiv.org:0710.3058}, year = 2007 } @misc{newman2003structure, abstract = {Inspired by empirical studies of networked systems such as the Internet, social networks, and biological networks, researchers have in recent years developed a variety of techniques and models to help us understand or predict the behavior of these systems. Here we review developments in this field, including such concepts as the small-world effect, degree distributions, clustering, network correlations, random graph models, models of network growth and preferential attachment, and dynamical processes taking place on networks.}, author = {Newman, M. E. J.}, file = {newman2003structure.pdf:newman2003structure.pdf:PDF}, interhash = {7bedd01cb4c06af9f5200b0fb3faa571}, intrahash = {d53568209eef08fb0a8734cf34c59a71}, lastdatemodified = {2006-10-07}, lastname = {Newman}, month = {March}, own = {notown}, pdf = {newman03-structure.pdf}, read = {notread}, title = {The structure and function of complex networks}, url = {http://arxiv.org/abs/cond-mat/0303516}, year = 2003 } @book{jain1988algorithms, address = {Upper Saddle River, NJ, USA}, author = {Jain, Anil K. and Dubes, Richard C.}, file = {jain1988algorithms.pdf:jain1988algorithms.pdf:PDF}, interhash = {443a79c152c5681cdc664714b50d116c}, intrahash = {4a1adbfdc7b83b201dd8fb3e5a109609}, lastdatemodified = {2007-03-13}, lastname = {Jain}, note = {Attention: PDF is rather large (~39MB)}, own = {notown}, pdf = {jain88_algorithms.pdf}, publisher = {Prentice-Hall, Inc.}, read = {notread}, title = {Algorithms for clustering data}, url = {http://portal.acm.org/citation.cfm?id=46712}, year = 1988 } @inproceedings{brooks2006improved, abstract = {Tags have recently become popular as a means of annotating and organizing Web pages and blog entries. Advocates of tagging argue that the use of tags produces a 'folksonomy', a system in which the meaning of a tag is determined by its use among the community as a whole. We analyze the effectiveness of tags for classifying blog entries by gathering the top 350 tags from Technorati and measuring the similarity of all articles that share a tag. We find that tags are useful for grouping articles into broad categories, but less effective in indicating the particular content of an article. We then show that automatically extracting words deemed to be highly relevant can produce a more focused categorization of articles. We also show that clustering algorithms can be used to reconstruct a topical hierarchy among tags, and suggest that these approaches may be used to address some of the weaknesses in current tagging systems.}, address = {New York, NY, USA}, author = {Brooks, Christopher H. and Montanez, Nancy}, booktitle = {WWW '06: Proceedings of the 15th international conference on World Wide Web}, file = {:brooks06-improved.pdf:PDF;brooks2006improved.pdf:brooks2006improved.pdf:PDF}, groups = {public}, interhash = {c88a665abf8d88c5a7ae95fa2783f837}, intrahash = {5c9c83e89da2faa8906a5927fe7ca3ef}, lastdatemodified = {2006-07-18}, lastname = {Brooks}, longnotes = {[[http://www2006.org/programme/files/pdf/583-slides.pdf slides]] Summary: - authors analyse the effectiveness of tags for classifying blog articles (technorati) - clustering of articles beloning to top 350 technorati tags * by tag * randomly * by related by Google News - results: * tags help to classify articles into broad categories (yet Google News performs better) * tags are not that descriptive for a specific topic of an article * automatically extracted tags (by TF/IDF) are much more descriptive for specific content - 2nd study: hierarchical clustering of articles (starting from tag clusters, i.e. all articles who share a tag) - resulting tag hierarchy comes close to e.g. Yahoo hand-built one}, own = {own}, pages = {625--632}, pdf = {brooks06-improved.pdf}, publisher = {ACM Press}, read = {read}, timestamp = {2009-09-29 16:23:07}, title = {Improved annotation of the blogosphere via autotagging and hierarchical clustering}, url = {http://www2006.org/programme/item.php?id=583}, username = {dbenz}, year = 2006 } @inproceedings{ramage2009clustering, abstract = {Automatically clustering web pages into semantic groups promises improved search and browsing on the web. In this paper, we demonstrate how user-generated tags from largescale social bookmarking websites such as del.icio.us can be used as a complementary data source to page text and anchor text for improving automatic clustering of web pages. This paper explores the use of tags in 1) K-means clustering in an extended vector space model that includes tags as well as page text and 2) a novel generative clustering algorithm based on latent Dirichlet allocation that jointly models text and tags. We evaluate the models by comparing their output to an established web directory. We find that the naive inclusion of tagging data improves cluster quality versus page text alone, but a more principled inclusion can substantially improve the quality of all models with a statistically significant absolute F-score increase of 4%. The generative model outperforms K-means with another 8% F-score increase.}, address = {New York, NY, USA}, author = {Ramage, Daniel and Heymann, Paul and Manning, Christopher D. and Garcia-Molina, Hector}, booktitle = {WSDM '09: Proceedings of the Second ACM International Conference on Web Search and Data Mining}, doi = {http://doi.acm.org/10.1145/1498759.1498809}, file = {ramage2009clustering.pdf:ramage2009clustering.pdf:PDF}, groups = {public}, interhash = {5595f06f88310ed67fd6fe23f813c69b}, intrahash = {75c4bad29d7eb4b34f68da27f0353516}, isbn = {978-1-60558-390-7}, location = {Barcelona, Spain}, pages = {54--63}, publisher = {ACM}, timestamp = {2009-04-24 10:19:45}, title = {Clustering the tagged web}, url = {http://portal.acm.org/citation.cfm?id=1498809}, username = {dbenz}, year = 2009 } @inproceedings{cimiano2004comparing, abstract = {The application of clustering methods for automatic taxonomy construction from text requires knowledge about the tradeoff between, (i), their effectiveness (quality of result), (ii), efficiency (run-time behaviour), and, (iii), traceability of the taxonomy construction by the ontology engineer. In this line, we present an original conceptual clustering method based on Formal Concept Analysis for automatic taxonomy construction and compare it with hierarchical agglomerative clustering and hierarchical divisive clustering.}, author = {Cimiano, Philipp and Hotho, Andreas and Staab, Steffen}, booktitle = {ECAI 2004 Proceedings of the 16th European Conference on Artificial Intelligence, 22 - 27 August, Valencia, Spain}, editor = {de M\'{a}ntaras, R. L\'{o}pez and Saitta, L.}, file = {cimiano2004comparing.pdf:cimiano2004comparing.pdf:PDF}, groups = {public}, interhash = {5ebc73142f0c4d51a1037432435bab94}, intrahash = {4e2f4ba3e051f120c2bc8216aad7cdaa}, pages = {435-439}, publisher = {IOS Press}, timestamp = {2011-02-02 13:38:11}, title = {Comparing Conceptual, Divise and Agglomerative Clustering for Learning Taxonomies from Text}, username = {dbenz}, year = 2004 } @inproceedings{grineva2008harnessing, abstract = {The quality of the current tagging services can be greatly improved if the service is able to cluster tags by their meaning. Tag clouds clustered by higher level topics enable the users to explore their tag space, which is especially needed when tag clouds become large. We demonstrate TagCluster - a tool for automated tag clustering that harnesses knowledge from Wikipedia about semantic relatedness between tags and names of categories to achieve smart clustering. Our approach shows much better quality of clusters compared to the existing techniques that rely on tag co-occurrence analysis in the tagging service.}, author = {Grineva, Maria and Grinev, Maxim and Turdakov, Denis and Velikhov, Pavel}, booktitle = {Proceedings of the International Workshop on Knowledge Acquisition from the Social Web (KASW2008)}, file = {grineva2008harnessing.pdf:grineva2008harnessing.pdf:PDF}, groups = {public}, interhash = {814ebc26a00c8facc9d2a7ef3edd256e}, intrahash = {093e8262f1cf4f2c4a159b5d7b76ce78}, timestamp = {2011-02-02 14:57:13}, title = {Harnessing Wikipedia for Smart Tags Clustering}, username = {dbenz}, year = 2008 } @article{Luo20091271, abstract = {Clustering is a very powerful data mining technique for topic discovery from text documents. The partitional clustering algorithms, such as the family of k-means, are reported performing well on document clustering. They treat the clustering problem as an optimization process of grouping documents into k clusters so that a particular criterion function is minimized or maximized. Usually, the cosine function is used to measure the similarity between two documents in the criterion function, but it may not work well when the clusters are not well separated. To solve this problem, we applied the concepts of neighbors and link, introduced in [S. Guha, R. Rastogi, K. Shim, ROCK: a robust clustering algorithm for categorical attributes, Information Systems 25 (5) (2000) 345-366], to document clustering. If two documents are similar enough, they are considered as neighbors of each other. And the link between two documents represents the number of their common neighbors. Instead of just considering the pairwise similarity, the neighbors and link involve the global information into the measurement of the closeness of two documents. In this paper, we propose to use the neighbors and link for the family of k-means algorithms in three aspects: a new method to select initial cluster centroids based on the ranks of candidate documents; a new similarity measure which uses a combination of the cosine and link functions; and a new heuristic function for selecting a cluster to split based on the neighbors of the cluster centroids. Our experimental results on real-life data sets demonstrated that our proposed methods can significantly improve the performance of document clustering in terms of accuracy without increasing the execution time much.}, author = {Luo, Congnan and Li, Yanjun and Chung, Soon M.}, doi = {10.1016/j.datak.2009.06.007}, interhash = {bf59c4cf26cbc35d6142630b34a66d37}, intrahash = {13483e90d8b46ef9435ec71473aacee4}, issn = {0169-023X}, journal = {Data & Knowledge Engineering}, note = {Including Special Section: Conference on Privacy in Statistical Databases (PSD 2008) - Six selected and extended papers on Database Privacy}, number = 11, pages = {1271 - 1288}, title = {Text document clustering based on neighbors}, url = {http://www.sciencedirect.com/science/article/B6TYX-4WNB4Y8-1/2/1dcd00d9c049988da53b44a526dd6555}, volume = 68, year = 2009 } @misc{Farrahi_discoveringhuman, abstract = {We present a framework to automatically discover people’s routines from information extracted by cell phones. The framework is built from a probabilistic topic model learned on novel bag type representations of activity-related cues (location, proximity and their temporal variations over a day) of peoples ’ daily routines. Using real-life data from the Reality Mining dataset, covering 68 000+ hours of human activities, we can successfully discover location-driven (from cell tower connections) and proximity-driven (from Bluetooth information) routines in an unsupervised manner. The resulting topics meaningfully characterize some of the underlying co-occurrence structure of the activities in the dataset, including “going to work early/late”, “being home all day”, “working constantly”, “working sporadically” and “meeting at lunch time”. 1.}, author = {Farrahi, Katayoun and Gatica-perez, Daniel}, interhash = {5e3f9c64f6fb9ba5226e3345acd3ddd8}, intrahash = {4c905f2cfc5e88c271ebc4f10d47de30}, title = {Discovering Human Routines from Cell Phone Data with Topic Models}, url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.139.5105}, year = 2010 } @inproceedings{lu2009exploit, abstract = {In this poster, we investigate how to enhance web clustering by leveraging the tripartite network of social tagging systems. We propose a clustering method, called "Tripartite Clustering", which cluster the three types of nodes (resources, users and tags) simultaneously based on the links in the social tagging network. The proposed method is experimented on a real-world social tagging dataset sampled from del.icio.us. We also compare the proposed clustering approach with K-means. All the clustering results are evaluated against a human-maintained web directory. The experimental results show that Tripartite Clustering significantly outperforms the content-based K-means approach and achieves performance close to that of social annotation-based K-means whereas generating much more useful information.}, address = {New York, NY, USA}, author = {Lu, Caimei and Chen, Xin and Park, E. K.}, booktitle = {CIKM '09: Proceeding of the 18th ACM conference on Information and knowledge management}, doi = {10.1145/1645953.1646167}, interhash = {e192e53972f28d78f1ecbffbfea08bed}, intrahash = {a120cece36e15b12321c87e7d0938d73}, isbn = {978-1-60558-512-3}, location = {Hong Kong, China}, pages = {1545--1548}, publisher = {ACM}, title = {Exploit the tripartite network of social tagging for web clustering}, url = {http://portal.acm.org/citation.cfm?id=1646167&dl=GUIDE&coll=GUIDE&CFID=93888742&CFTOKEN=72927742}, year = 2009 } @misc{Karypis02multilevelhypergraph, abstract = {Introduction Hypergraph partitioning is an important problem with extensive application to many areas, including VLSI design [Alpert and Kahng, 1995], efficient storage of large databases on disks [Shekhar and Liu, 1996], and data mining [Mobasher et al., 1996, Karypis et al., 1999b]. The problem is to partition the vertices of a hypergraph into k equal-size parts, such that the number of hyperedges connecting vertices in different parts is minimized. During the course of VLSI circuit design and synthesis, it is important to be able to divide the system specification into clusters so that the inter-cluster connections are minimized. This step has many applications including design packaging, HDL-based synthesis, design optimization, rapid prototyping, simulation, and testing. Many rapid prototyping systems use partitioning to map a complex circuit onto hundreds of interconnected FPGAs. Such partitioning instances are challenging because the timing, area, and I/O resource utilization }, author = {Karypis, George}, interhash = {c79f1aad4b40640a346bd67fdd4eada3}, intrahash = {e1d8b31de59731bbf41a8559c8cf9caa}, title = {Multilevel Hypergraph Partitioning}, url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.6.9117}, year = 2002 } @inproceedings{grahl2007clustering, abstract = {Currently, social bookmarking systems provide intuitive support for browsing locally their content. A global view is usually presented by the tag cloud of the system, but it does not allow a conceptual drill-down, e. g., along a conceptual hierarchy. In this paper, we present a clustering approach for computing such a conceptual hierarchy for a given folksonomy. The hierarchy is complemented with ranked lists of users and resources most related to each cluster. The rankings are computed using our FolkRank algorithm. We have evaluated our approach on large scale data from the del.icio.us bookmarking system.}, address = {Graz, Austria}, author = {Grahl, Miranda and Hotho, Andreas and Stumme, Gerd}, booktitle = {7th International Conference on Knowledge Management (I-KNOW '07)}, interhash = {5cf58d2fdd3c17f0b0c54ce098ff5b60}, intrahash = {334d3ab11400c4a3ea3ed5b1e95c1855}, issn = {0948-695x}, month = sep, pages = {356-364}, publisher = {Know-Center}, title = {Conceptual Clustering of Social Bookmarking Sites}, url = {/brokenurl#www.tagora-project.eu/wp-content/2007/06/grahl_iknow07.pdf}, vgwort = {14}, year = 2007 } @inproceedings{conf/icdm/TangWL09, author = {Tang, Lei and Wang, Xufei and Liu, Huan}, booktitle = {ICDM}, crossref = {conf/icdm/2009}, date = {2010-01-27}, editor = {Wang, Wei and Kargupta, Hillol and Ranka, Sanjay and Yu, Philip S. and Wu, Xindong}, ee = {http://doi.ieeecomputersociety.org/10.1109/ICDM.2009.20}, interhash = {e689d9e29e78d2c869896121ad37a772}, intrahash = {e54ea4c1636d8589d7a7d119291cb1ea}, isbn = {978-0-7695-3895-2}, pages = {503-512}, publisher = {IEEE Computer Society}, title = {Uncoverning Groups via Heterogeneous Interaction Analysis.}, url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.150.7384&rep=rep1&type=pdf}, year = 2009 } @inproceedings{conf/sigir/HuFCZLYC08, author = {Hu, Jian and Fang, Lujun and Cao, Yang and Zeng, Hua-Jun and Li, Hua and Yang, Qiang and Chen, Zheng}, booktitle = {SIGIR}, crossref = {conf/sigir/2008}, date = {2008-07-27}, editor = {Myaeng, Sung-Hyon and Oard, Douglas W. and Sebastiani, Fabrizio and Chua, Tat-Seng and Leong, Mun-Kew}, ee = {http://doi.acm.org/10.1145/1390334.1390367}, interhash = {0a2878165034dcdfacb9045608ec482a}, intrahash = {76f863a12c0b983ec67682deaec1ada4}, isbn = {978-1-60558-164-4}, pages = {179-186}, publisher = {ACM}, title = {Enhancing text clustering by leveraging Wikipedia semantics.}, url = {http://dblp.uni-trier.de/db/conf/sigir/sigir2008.html#HuFCZLYC08}, year = 2008 } @inproceedings{conf/pakdd/HuangMFW09, author = {Huang, Anna and Milne, David N. and Frank, Eibe and Witten, Ian H.}, booktitle = {PAKDD}, crossref = {conf/pakdd/2009}, date = {2009-04-25}, editor = {Theeramunkong, Thanaruk and Kijsirikul, Boonserm and Cercone, Nick and Ho, Tu Bao}, ee = {http://dx.doi.org/10.1007/978-3-642-01307-2_62}, interhash = {b2ea40479e6537693a659a4342892fee}, intrahash = {63d65d3bd978e39f33f39222be9a3f76}, isbn = {978-3-642-01306-5}, pages = {628-636}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, title = {Clustering Documents Using a Wikipedia-Based Concept Representation.}, url = {http://dblp.uni-trier.de/db/conf/pakdd/pakdd2009.html#HuangMFW09}, volume = 5476, year = 2009 } @article{jrg1998densitybased, abstract = {The clustering algorithm DBSCAN relies on a density-based notion of clusters and is designed to discover clusters of arbitrary shape as well as to distinguish noise. In this paper, we generalize this algorithm in two important directions. The generalized algorithm—called GDBSCAN—can cluster point objects as well as spatially extended objects according to both, their spatial and their nonspatial attributes. In addition, four applications using 2D points (astronomy), 3D points (biology), 5D points (earth science) and 2D polygons (geography) are presented, demonstrating the applicability of GDBSCAN to real-world problems. ER -}, author = {Sander, Jörg and Ester, Martin and Kriegel, Hans-Peter and Xu, Xiaowei}, interhash = {3f2615cbf7c60d63f0a1ccc82e0caea1}, intrahash = {a15f4445f49f37f272b373c69231a590}, journal = {Data Mining and Knowledge Discovery}, month = {#jun#}, number = 2, pages = {169--194}, title = {Density-Based Clustering in Spatial Databases: The Algorithm GDBSCAN and Its Applications}, url = {http://dx.doi.org/10.1023/A:1009745219419}, volume = 2, year = 1998 } @inproceedings{Detecting_Commmunities_via_Simultaneous_Clustering_of_Graphs_and_Folksonomies, author = {Java, Akshay and Joshi, Anupam and Finin, Tim}, booktitle = {WebKDD 2008 Workshop on Web Mining and Web Usage Analysis}, interhash = {acfec953843b168e61e2e167e29b4c3d}, intrahash = {645abd6b3191a2a6e844d7542651ed1c}, month = {August}, note = {To Appear}, title = {Detecting Commmunities via Simultaneous Clustering of Graphs and Folksonomies}, year = 2008 } @inproceedings{conf/kdd/ChiSZHT07, author = {Chi, Yun and Song, Xiaodan and Zhou, Dengyong and Hino, Koji and Tseng, Belle L.}, booktitle = {KDD}, crossref = {conf/kdd/2007}, date = {2007-08-23}, editor = {Berkhin, Pavel and Caruana, Rich and Wu, Xindong}, ee = {http://doi.acm.org/10.1145/1281192.1281212}, interhash = {542ce3968b0d75048000f35669a7fb83}, intrahash = {0829ef077986e88540a96bd8ba154d86}, isbn = {978-1-59593-609-7}, pages = {153-162}, publisher = {ACM}, title = {Evolutionary spectral clustering by incorporating temporal smoothness.}, url = {http://dblp.uni-trier.de/db/conf/kdd/kdd2007.html#ChiSZHT07}, year = 2007 } @inproceedings{conf/icml/WagstaffCRS01, author = {Wagstaff, Kiri and Cardie, Claire and Rogers, Seth and Schrödl, Stefan}, booktitle = {ICML}, crossref = {conf/icml/2001}, date = {2002-11-27}, editor = {Brodley, Carla E. and Danyluk, Andrea Pohoreckyj}, interhash = {10d8a7c9e5b5f9cf0d0848cf8c10f604}, intrahash = {c0c3565625192ee6748b52d9d4f3b526}, isbn = {1-55860-778-1}, pages = {577-584}, publisher = {Morgan Kaufmann}, title = {Constrained K-means Clustering with Background Knowledge.}, url = {http://dblp.uni-trier.de/db/conf/icml/icml2001.html#WagstaffCRS01}, year = 2001 } @inproceedings{conf/cvpr/ShiM97, author = {Shi, Jianbo and Malik, Jitendra}, booktitle = {CVPR}, ee = {http://computer.org/proceedings/cvpr/7822/78220731abs.htm}, interhash = {600345c3af56da066873a30c9971a615}, intrahash = {bc4607ac2084911e4b1ba23b323f649a}, pages = {731-737}, title = {Normalized Cuts and Image Segmentation.}, url = {http://dblp.uni-trier.de/db/conf/cvpr/cvpr1997.html#ShiM97}, year = 1997 } @inproceedings{grahl07conceptualKdml, author = {Grahl, Miranda and Hotho, Andreas and Stumme, Gerd}, booktitle = {Workshop Proceedings of Lernen - Wissensentdeckung - Adaptivität (LWA 2007)}, editor = {Hinneburg, Alexander}, interhash = {9c3bb05456bf11bcd88a1135de51f7d9}, intrahash = {6d5188d66564fe4ed7386e28868504de}, isbn = {978-3-86010-907-6}, month = sep, pages = {50-54}, publisher = {Martin-Luther-Universität Halle-Wittenberg}, title = {Conceptual Clustering of Social Bookmark Sites}, url = {http://www.tagora-project.eu/wp-content/2007/06/grahl_iknow07.pdf }, vgwort = {14}, year = 2007 } @inproceedings{Begelman2006, address = {Edinburgh}, author = {Begelman, Grigory and Keller, Philipp and Smadja, Frank}, booktitle = {Proceedings of the WWW 2006 Workshop on Collaborative Web Tagging Workshop}, interhash = {ffacd9d40f6cba1aa8140f501c2a1802}, intrahash = {95449b3d4b12e8930d529e1e22d51e04}, month = may, pdf = {http://www.rawsugar.com/www2006/20.pdf}, timestamp = {2007.04.11}, title = {Automated Tag Clustering: Improving search and exploration in the tag space}, url = {http://www.rawsugar.com/www2006/taggingworkshopschedule.html}, year = 2006 }