@inproceedings{mitzlaff2011community, author = {Mitzlaff, Folke and Atzmueller, Martin and Benz, Dominik and Hotho, Andreas and Stumme, Gerd}, booktitle = {Analysis of Social Media and Ubiquitous Data}, interhash = {1ef065a81ed836dfd31fcc4cd4da133b}, intrahash = {0f45e870093c053e6f41f54c14bda46b}, series = {LNAI}, title = {{Community Assessment using Evidence Networks}}, volume = 6904, year = 2011 } @proceedings{Gunawardana2935, author = {Gunawardana, Asela and Shani, Guy}, interhash = {441df9b673faf85aecc45babd8883069}, intrahash = {49600df05a884106989d71dedcaa7e1b}, page = {2935−2962}, series = 2935, title = { A Survey of Accuracy Evaluation Metrics of Recommendation Tasks }, url = {http://jmlr.csail.mit.edu/papers/v10/gunawardana09a.html}, volume = {v10}, year = 2009 } @misc{Leskovec2010, abstract = { Detecting clusters or communities in large real-world graphs such as large social or information networks is a problem of considerable interest. In practice, one typically chooses an objective function that captures the intuition of a network cluster as set of nodes with better internal connectivity than external connectivity, and then one applies approximation algorithms or heuristics to extract sets of nodes that are related to the objective function and that "look like" good communities for the application of interest. In this paper, we explore a range of network community detection methods in order to compare them and to understand their relative performance and the systematic biases in the clusters they identify. We evaluate several common objective functions that are used to formalize the notion of a network community, and we examine several different classes of approximation algorithms that aim to optimize such objective functions. In addition, rather than simply fixing an objective and asking for an approximation to the best cluster of any size, we consider a size-resolved version of the optimization problem. Considering community quality as a function of its size provides a much finer lens with which to examine community detection algorithms, since objective functions and approximation algorithms often have non-obvious size-dependent behavior. }, author = {Leskovec, Jure and Lang, Kevin J. and Mahoney, Michael W.}, interhash = {0e58de655596b2198f4a7001facd0c32}, intrahash = {410a9cbea51ea5dd3c56aad26a0e11b2}, note = {cite arxiv:1004.3539 }, title = {Empirical Comparison of Algorithms for Network Community Detection}, url = {http://arxiv.org/abs/1004.3539}, year = 2010 } @inproceedings{brank2005, author = {Brank, Janez and Grobelnik, Marko and Mladeni{\'c}, Dunja}, booktitle = {Proc. of 8th Int. multi-conf. Information Society}, interhash = {394d7ea166cc0745dc8682a65975648c}, intrahash = {8c910a2d3f6708b23e03e06ff843c8a8}, pages = {166--169}, title = {A Survey of Ontology Evaluation Techniques}, year = 2005 } @article{voelker2008aeon, abstract = {OntoClean is an approach towards the formal evaluation of taxonomic relations in ontologies. The application of OntoClean consists of two main steps. First, concepts are tagged according to meta-properties known as rigidity, unity, dependency and identity. Second, the tagged concepts are checked according to predefined constraints to discover taxonomic errors. Although OntoClean is well documented in numerous publications, it is still used rather infrequently due to the high costs of application. Especially, the manual tagging of concepts with the correct meta-properties requires substantial efforts of highly experienced ontology engineers. In order to facilitate the use of OntoClean and to enable the evaluation of real-world ontologies, we provide AEON, a tool which automatically tags concepts with appropriate OntoClean meta-properties and performs the constraint checking. We use the Web as an embodiment of world knowledge, where we search for patterns that indicate how to properly tag concepts. We thoroughly evaluated our approach against a manually created gold standard. The evaluation shows the competitiveness of our approach while at the same time significantly lowering the costs. All of our results, i.e. the tool AEON as well as the experiment data, are publicly available.}, address = {Amsterdam, The Netherlands, The Netherlands}, author = {Völker, Johanna and Vrandečić, Denny and Sure, York and Hotho, Andreas}, interhash = {f14794f4961d0127dc50c1938eaef7ea}, intrahash = {f8f0bb3e3495e7627770b470d1a5f1a3}, issn = {1570-5838}, journal = {Applied Ontology}, number = {1-2}, pages = {41--62}, publisher = {IOS Press}, title = {AEON - An approach to the automatic evaluation of ontologies}, url = {http://portal.acm.org/citation.cfm?id=1412422}, volume = 3, year = 2008 } @inproceedings{1316677, address = {New York, NY, USA}, author = {Farooq, Umer and Kannampallil, Thomas G. and Song, Yang and Ganoe, Craig H. and Carroll, John M. and Giles, Lee}, booktitle = {GROUP '07: Proceedings of the 2007 international ACM conference on Conference on supporting group work}, doi = {http://doi.acm.org/10.1145/1316624.1316677}, interhash = {66928ca91bf0d777b848fe6f7a55de20}, intrahash = {5d0b61727d81aed019ba4297090108ca}, isbn = {978-1-59593-845-9}, location = {Sanibel Island, Florida, USA}, pages = {351--360}, publisher = {ACM}, title = {Evaluating tagging behavior in social bookmarking systems: metrics and design heuristics}, url = {http://portal.acm.org/citation.cfm?id=1316677&coll=Portal&dl=GUIDE&CFID=9767993&CFTOKEN=86305662}, year = 2007 } @inproceedings{PuWang:2007, abstract = {The exponential growth of text documents available on the Internet has created an urgent need for accurate, fast, and general purpose text classification algorithms. However, the "bag of words" representation used for these classification methods is often unsatisfactory as it ignores relationships between important terms that do not co-occur literally. In order to deal with this problem, we integrate background knowledge - in our application: Wikipedia - into the process of classifying text documents. The experimental evaluation on Reuters newsfeeds and several other corpus shows that our classification results with encyclopedia knowledge are much better than the baseline "bag of words " methods.}, author = {Wang, Pu and Hu, Jian and Zeng, Hua-Jun and Chen, Lijun and Chen, Zheng}, booktitle = {Data Mining, 2007. ICDM 2007. Seventh IEEE International Conference on}, doi = {10.1109/ICDM.2007.77}, interhash = {8a899b60047e20e162fc12b2ff6f8142}, intrahash = {66058efbca5abd1222f72c32365d23fa}, isbn = {978-0-7695-3018-5}, issn = {1550-4786}, pages = {332-341}, title = {Improving Text Classification by Using Encyclopedia Knowledge}, url = {ftp://ftp.computer.org/press/outgoing/proceedings/icdm07/Data/3018a332.pdf}, year = 2007 } @techreport{fawcett04roc, author = {Fawcett, T.}, howpublished = {Tech Report HPL-2003-4}, institution = {HP Laboratories}, interhash = {aba97a4e2d1f8439ca8f129ceb05005b}, intrahash = {c580a50d58db5cd78d7dc5ab3cbd2a29}, title = {ROC Graphs: Notes and Practical Considerations for Researchers}, url = {http://www.hpl.hp.com/techreports/2003/HPL-2003-4.pdf}, year = 2004 } @article{journals/ml/ZhaoK04, author = {Zhao, Ying and Karypis, George}, date = {2005-12-08}, ee = {http://dx.doi.org/10.1023/B:MACH.0000027785.44527.d6}, interhash = {900e28ac463ff44cbadf055cfbcf92ea}, intrahash = {71ea6e1192ea34ac8193867c2512927a}, journal = {Machine Learning}, number = 3, pages = {311-331}, title = {Empirical and Theoretical Comparisons of Selected Criterion Functions for Document Clustering}, url = {http://glaros.dtc.umn.edu/gkhome/fetch/papers/crfunML04.pdf}, volume = 55, year = 2004 } @article{bezdek1997, abstract = {We study indices for choosing the correct number of components in a mixture of normal distributions. Previous studies have been confined to indices based wholly on probabilistic models. Viewing mixture decomposition as probabilistic clustering (where the emphasis is on partitioning for geometric substructure) as opposed to parametric estimation enables us to introduce both fuzzy and crisp measures of cluster validity for this problem. We presume the underlying samples to be unlabeled, and use the expectation-maximization (EM) algorithm to find clusters in the data. We test 16 probabilistic, 3 fuzzy and 4 crisp indices on 12 data sets that are samples from bivariate normal mixtures having either 3 or 6 components. Over three run averages based on different initializations of EM, 10 of the 23 indices tested for choosing the right number of mixture components were correct in at least 9 of the 12 trials. Among these were the fuzzy index of Xie-Beni, the crisp Davies-Bouldin index, and two crisp indices that are recent generalizations of Dunn's index. ER -}, author = {Bezdek, J. C. and Li, W. Q. and Attikiouzel, Y. and Windham, M.}, interhash = {05d873514339648dae0b6009fe1e0133}, intrahash = {6c2a17868951a4a270f42332355d132b}, journal = {Soft Computing - A Fusion of Foundations, Methodologies and Applications}, month = {#dec#}, number = 4, pages = {166--179}, title = {A geometric approach to cluster validity for normal mixtures}, url = {http://dx.doi.org/10.1007/s005000050019}, volume = 1, year = 1997 } @inproceedings{salton1988spreading, abstract = {Spreading activation methods have been recommended in information retrieval to expand the search vocabulary and to complement the retrieved document sets. The spreading activation strategy is reminiscent of earlier associative indexing and retrieval systems. Some spreading activation procedures are briefly described, and evaluation output is given, reflecting the effectiveness of one of the proposed procedures.}, address = {New York, NY, USA}, author = {Salton, G. and Buckley, C.}, booktitle = {SIGIR '88: Proceedings of the 11th annual international ACM SIGIR conference on Research and development in information retrieval}, doi = {http://doi.acm.org/10.1145/62437.62447}, interhash = {7fdb31627e1a45ce109c7245fb6462b9}, intrahash = {994aef0486e69095ee0d8ba5b3e3a91c}, isbn = {2-7061-0309-4}, location = {Grenoble, France}, pages = {147--160}, publisher = {ACM Press}, title = {On the use of spreading activation methods in automatic information}, url = {http://portal.acm.org/citation.cfm?id=62447&dl=ACM&coll=GUIDE}, year = 1988 } @inproceedings{dellschaft2006GoldEvalOntoLearn, address = {Athens, GA, USA}, author = {Dellschaft, Klaas and Staab, Steffen}, booktitle = {In: Proc. of ISWC-2006 International Semantic Web Conference}, interhash = {bd5dcdc47711f5dce1a2546db5b66e79}, intrahash = {0bfd502e363ef3f1523d77f972f08397}, month = {November}, pdf = {http://www.uni-koblenz.de/~staab/Research/Publications/2006/DellschaftStaabISWCsubmitted.pdf}, publisher = {Springer, LNCS}, title = {On How to Perform a Gold Standard based Evaluation of Ontology Learning}, url = {http://iswc2006.semanticweb.org/items/paper_44.php}, year = 2006 } @inproceedings{comparing2003meila, author = {Meila, Marina}, booktitle = {Proc. of COLT 03}, interhash = {c445be06ea0536e64a276a4e1408356e}, intrahash = {4cfd500d784db1a78f58e6e42d34d31a}, title = {Comparing clusterings }, url = {http://www.stat.washington.edu/mmp/www.stat.washington.edu/mmp/Papers/compare-colt.pdf}, year = 2003 } @inproceedings{hotho_sigir03, address = {Toronto, Canada}, author = {Hotho, A. and Staab, S. and Stumme, G.}, booktitle = {Proc. of the SIGIR 2003 Semantic Web Workshop}, interhash = {c2a9a89ce20cef90a1e78d34dc2c2afe}, intrahash = {b03e58ecb17c09f8c09d1fd93fb24f90}, title = {WordNet improves text document clustering}, url = {http://www.kde.cs.uni-kassel.de/hotho/pub/hothoetal_sigir_ws_sem_web.pdf}, year = 2003 } @article{herlocker04:_evalcoll, author = {Herlocker, J.L. and Konstan, J.A. and Terveen, L.G. and Riedl, J.T.}, doi = {http://doi.acm.org/10.1145/223904.223929}, interhash = {f8a70731d983634ac7105896d101c9d2}, intrahash = {f688a96d523280b7e051648472fddd84}, isbn = {0-201-84705-1}, journal = {ACM Transactions on Information Systems}, location = {Denver, Colorado, United States}, month = {January}, number = 1, pages = {5--53}, title = {{Evaluating Collaborative Filtering Recommender Systems}}, volume = 22, year = 2004 } @article{fickel97, author = {Fickel, N.}, interhash = {67c759fbf4e63ba4a81f807a1fa26479}, intrahash = {b5242e619ea59d5adc362d28453c478a}, journal = {Allgemeines Statistisches Archiv, Vandenhoeck \& Ruprecht in Göttingen}, number = 3, pages = {249-265}, title = {Clusteranalyse mit gemischt-skalierten Merkmalen: Abstrahierung vom Skalenniveau}, volume = 81, year = 1997 } @article{stgh02b, abstract = {This paper introduces the problem of combining multiple partitionings of a set of objects into a single consolidated clustering without accessing the features or algorithms that determined these partitionings. We first identify several application scenarios for the resultant 'knowledge reuse' framework that we call cluster ensembles. The cluster ensemble problem is then formalized as a combinatorial optimization problem in terms of shared mutual information. In addition to a direct maximization approach, we propose three effective and efficient techniques for obtaining high-quality combiners (consensus functions). The first combiner induces a similarity measure from the partitionings and then reclusters the objects. The second combiner is based on hypergraph partitioning. The third one collapses groups of clusters into meta-clusters which then compete for each object to determine the combined clustering. Due to the low computational costs of our techniques, it is quite feasible to use a supra-consensus function that evaluates all three approaches against the objective function and picks the best solution for a given situation. We evaluate the effectiveness of cluster ensembles in three qualitatively different application scenarios: (i) where the original clusters were formed based on non-identical sets of features, (ii) where the original clustering algorithms worked on non-identical sets of objects, and (iii) where a common data-set is used and the main purpose of combining multiple clusterings is to improve the quality and robustness of the solution. Promising results are obtained in all three situations for synthetic as well as real data-sets.}, author = {Strehl, Alexander and Ghosh, Joydeep}, interhash = {e911f252812b99bbec4893fa6788a05a}, intrahash = {7fc2fdc5892130af320ac51b952149bf}, issn = {1533-7928}, journal = {Journal on Machine Learning Research (JMLR)}, month = {December}, pages = {583--617}, ps = {http://strehl.com/download/strehl-jmlr02.ps.gz}, publisher = {MIT Press}, title = {Cluster Ensembles -- A Knowledge Reuse Framework for Combining Multiple Partitions}, url = {http://strehl.com/download/strehl-jmlr02.pdf}, volume = 3, year = 2002 } @book{hoeppner1999fuzzy, author = {Höppner, Frank and Klawonn, Frank and Kruse, Rudolf and Runkler, Thomas}, bibsource = {DBLP, http://dblp.uni-trier.de}, interhash = {daa6dcfce2b8fd6153cc8b89b142c263}, intrahash = {8e77172459a6abf4c50dc14a5e1e0467}, isbn = {3-540-40317-5}, publisher = {John Wiley \& Sons, Inc.}, title = {Fuzzy Cluster Analysis}, year = 1999 } @book{siegel:b88, author = {Siegel, S. and Castellan, N.J.}, edition = {Second}, interhash = {a7aa17712ec15867d7803811e5267a88}, intrahash = {e2ac0dc902159ea0b174e65a72cb40e0}, publisher = {McGraw--Hill, Inc.}, title = {Nonparametric statistics for the behavioral sciences}, year = 1988 } @techreport{Weingessel99, author = {Weingessel, A. and Dimitriadou, E. and Dolnicar, S.}, institution = {SFB ``Adaptive Information Systems and Modeling in Economics and Management Science''}, interhash = {21aff217b0b88e0b7c9aa7117fdf91c1}, intrahash = {8d0a369818293ea71ff632882b988b01}, number = {Working Paper 29}, title = {An examination of indexes for determining the number of clusters in binary data sets}, url = {http://epub.wu-wien.ac.at/dyn/virlib/wp/showentry?ID=epub-wu-01_188}, year = 1999 }