@article{stgh02b, abstract = {This paper introduces the problem of combining multiple partitionings of a set of objects into a single consolidated clustering without accessing the features or algorithms that determined these partitionings. We first identify several application scenarios for the resultant 'knowledge reuse' framework that we call cluster ensembles. The cluster ensemble problem is then formalized as a combinatorial optimization problem in terms of shared mutual information. In addition to a direct maximization approach, we propose three effective and efficient techniques for obtaining high-quality combiners (consensus functions). The first combiner induces a similarity measure from the partitionings and then reclusters the objects. The second combiner is based on hypergraph partitioning. The third one collapses groups of clusters into meta-clusters which then compete for each object to determine the combined clustering. Due to the low computational costs of our techniques, it is quite feasible to use a supra-consensus function that evaluates all three approaches against the objective function and picks the best solution for a given situation. We evaluate the effectiveness of cluster ensembles in three qualitatively different application scenarios: (i) where the original clusters were formed based on non-identical sets of features, (ii) where the original clustering algorithms worked on non-identical sets of objects, and (iii) where a common data-set is used and the main purpose of combining multiple clusterings is to improve the quality and robustness of the solution. Promising results are obtained in all three situations for synthetic as well as real data-sets.}, author = {Strehl, Alexander and Ghosh, Joydeep}, interhash = {e911f252812b99bbec4893fa6788a05a}, intrahash = {7fc2fdc5892130af320ac51b952149bf}, issn = {1533-7928}, journal = {Journal on Machine Learning Research (JMLR)}, month = {December}, pages = {583--617}, ps = {http://strehl.com/download/strehl-jmlr02.ps.gz}, publisher = {MIT Press}, title = {Cluster Ensembles -- A Knowledge Reuse Framework for Combining Multiple Partitions}, url = {http://strehl.com/download/strehl-jmlr02.pdf}, volume = 3, year = 2002 } @article{wu2008wu, abstract = {This paper presents the top 10 data mining algorithms identified by the IEEE International Conference on Data Mining (ICDM) in December 2006: C4.5, k-Means, SVM, Apriori, EM, PageRank, AdaBoost, kNN, Naive Bayes, and CART. These top 10 algorithms are among the most influential data mining algorithms in the research community.With each algorithm, we provide a description of the algorithm, discuss the impact of the algorithm, and review current andfurther research on the algorithm. These 10 algorithms cover classification, clustering, statistical learning, associationanalysis, and link mining, which are all among the most important topics in data mining research and development.}, address = {London}, author = {Wu, Xindong and Kumar, Vipin and Quinlan, J. Ross and Ghosh, Joydeep and Yang, Qiang and Motoda, Hiroshi and McLachlan, Geoffrey and Ng, Angus and Liu, Bing and Yu, Philip and Zhou, Zhi-Hua and Steinbach, Michael and Hand, David and Steinberg, Dan}, interhash = {76fd294a34cf85638f6e194a85af8db9}, intrahash = {2c34bb4b49187a6d3e780e78d254ae1f}, issn = {0219-1377}, journal = {Knowledge and Information Systems}, month = Jan, number = 1, pages = {1--37}, publisher = {Springer}, title = {Top 10 algorithms in data mining}, url = {http://dx.doi.org/10.1007/s10115-007-0114-2}, volume = 14, year = 2008 } @article{journals/jmlr/BanerjeeMDG05, author = {Banerjee, Arindam and Merugu, Srujana and Dhillon, Inderjit S. and Ghosh, Joydeep}, date = {2007-02-21}, ee = {http://www.jmlr.org/papers/v6/banerjee05b.html}, interhash = {50d46127d134382ca84699ce24171c3f}, intrahash = {bba5d5241acf3ec9eea3f869a832c629}, journal = {Journal of Machine Learning Research}, pages = {1705-1749}, title = {Clustering with Bregman Divergences.}, url = {http://dblp.uni-trier.de/db/journals/jmlr/jmlr6.html#BanerjeeMDG05}, volume = 6, year = 2005 }