Mitzlaff, F.; Atzmueller, M.; Benz, D.; Hotho, A. & Stumme, G.
(2011):
Community Assessment using Evidence Networks.
In: Analysis of Social Media and Ubiquitous Data,
[BibTeX][Endnote]
@inproceedings{mitzlaff2011community,
author = {Mitzlaff, Folke and Atzmueller, Martin and Benz, Dominik and Hotho, Andreas and Stumme, Gerd},
title = {Community Assessment using Evidence Networks},
booktitle = {Analysis of Social Media and Ubiquitous Data},
series = {LNAI},
year = {2011},
volume = {6904},
keywords = {2011, community, evaluation, knowledge, mining, myown}
}
%0 = inproceedings
%A = Mitzlaff, Folke and Atzmueller, Martin and Benz, Dominik and Hotho, Andreas and Stumme, Gerd
%B = Analysis of Social Media and Ubiquitous Data
%D = 2011
%T = Community Assessment using Evidence Networks
Leskovec, J.; Lang, K. J. & Mahoney, M. W.
(2010):
Empirical Comparison of Algorithms for Network Community Detection.
[Volltext] [Kurzfassung] [BibTeX]
[Endnote]
Detecting clusters or communities in large real-world graphs such as large
cial or information networks is a problem of considerable interest. In
actice, one typically chooses an objective function that captures the
tuition of a network cluster as set of nodes with better internal
nnectivity than external connectivity, and then one applies approximation
gorithms or heuristics to extract sets of nodes that are related to the
jective function and that "look like" good communities for the application of
terest. In this paper, we explore a range of network community detection
thods in order to compare them and to understand their relative performance
d the systematic biases in the clusters they identify. We evaluate several
mmon objective functions that are used to formalize the notion of a network
mmunity, and we examine several different classes of approximation algorithms
at aim to optimize such objective functions. In addition, rather than simply
xing an objective and asking for an approximation to the best cluster of any
ze, we consider a size-resolved version of the optimization problem.
nsidering community quality as a function of its size provides a much finer
ns with which to examine community detection algorithms, since objective
nctions and approximation algorithms often have non-obvious size-dependent
havior.
@misc{Leskovec2010,
author = {Leskovec, Jure and Lang, Kevin J. and Mahoney, Michael W.},
title = {Empirical Comparison of Algorithms for Network Community Detection},
year = {2010},
note = {cite arxiv:1004.3539
},
url = {http://arxiv.org/abs/1004.3539},
keywords = {clustering, community, comparision, empirical, evaluation, graph, toread},
abstract = { Detecting clusters or communities in large real-world graphs such as large
social or information networks is a problem of considerable interest. In
practice, one typically chooses an objective function that captures the
intuition of a network cluster as set of nodes with better internal
connectivity than external connectivity, and then one applies approximation
algorithms or heuristics to extract sets of nodes that are related to the
objective function and that "look like" good communities for the application of
interest. In this paper, we explore a range of network community detection
methods in order to compare them and to understand their relative performance
and the systematic biases in the clusters they identify. We evaluate several
common objective functions that are used to formalize the notion of a network
community, and we examine several different classes of approximation algorithms
that aim to optimize such objective functions. In addition, rather than simply
fixing an objective and asking for an approximation to the best cluster of any
size, we consider a size-resolved version of the optimization problem.
Considering community quality as a function of its size provides a much finer
lens with which to examine community detection algorithms, since objective
functions and approximation algorithms often have non-obvious size-dependent
behavior.
}
}
%0 = misc
%A = Leskovec, Jure and Lang, Kevin J. and Mahoney, Michael W.
%B = }
%C =
%D = 2010
%I =
%T = Empirical Comparison of Algorithms for Network Community Detection}
%U = http://arxiv.org/abs/1004.3539
Gunawardana, A. & Shani, G.
(2009):
A Survey of Accuracy Evaluation Metrics of Recommendation Tasks . 2935
[Volltext] [BibTeX]
[Endnote]
@proceedings{Gunawardana2935,
author = {Gunawardana, Asela and Shani, Guy},
title = { A Survey of Accuracy Evaluation Metrics of Recommendation Tasks },
series = {2935},
year = {2009},
volume = {v10},
url = {http://jmlr.csail.mit.edu/papers/v10/gunawardana09a.html},
keywords = {evaluation, metrics, recommender, survey, toread}
}
%0 = proceedings
%A = Gunawardana, Asela and Shani, Guy
%B = }
%C =
%D = 2009
%I =
%T = A Survey of Accuracy Evaluation Metrics of Recommendation Tasks }
%U = http://jmlr.csail.mit.edu/papers/v10/gunawardana09a.html
Völker, J.; Vrandečić, D.; Sure, Y. & Hotho, A.
(2008):
AEON - An approach to the automatic evaluation of ontologies.
In: Applied Ontology,
Ausgabe/Number: 1-2,
Vol. 3,
Verlag/Publisher: IOS Press.
Erscheinungsjahr/Year: 2008.
Seiten/Pages: 41-62.
[Volltext] [Kurzfassung] [BibTeX]
[Endnote]
OntoClean is an approach towards the formal evaluation of taxonomic relations in ontologies. The application of OntoClean consists of two main steps. First, concepts are tagged according to meta-properties known as rigidity, unity, dependency and identity. Second, the tagged concepts are checked according to predefined constraints to discover taxonomic errors. Although OntoClean is well documented in numerous publications, it is still used rather infrequently due to the high costs of application. Especially, the manual tagging of concepts with the correct meta-properties requires substantial efforts of highly experienced ontology engineers. In order to facilitate the use of OntoClean and to enable the evaluation of real-world ontologies, we provide AEON, a tool which automatically tags concepts with appropriate OntoClean meta-properties and performs the constraint checking. We use the Web as an embodiment of world knowledge, where we search for patterns that indicate how to properly tag concepts. We thoroughly evaluated our approach against a manually created gold standard. The evaluation shows the competitiveness of our approach while at the same time significantly lowering the costs. All of our results, i.e. the tool AEON as well as the experiment data, are publicly available.
@article{voelker2008aeon,
author = {Völker, Johanna and Vrandečić, Denny and Sure, York and Hotho, Andreas},
title = {AEON - An approach to the automatic evaluation of ontologies},
journal = {Applied Ontology},
publisher = {IOS Press},
address = {Amsterdam, The Netherlands, The Netherlands},
year = {2008},
volume = {3},
number = {1-2},
pages = {41--62},
url = {http://portal.acm.org/citation.cfm?id=1412422},
issn = {1570-5838},
keywords = {2008, automatic, evaluation, ml, myown, ontology, sw},
abstract = {OntoClean is an approach towards the formal evaluation of taxonomic relations in ontologies. The application of OntoClean consists of two main steps. First, concepts are tagged according to meta-properties known as rigidity, unity, dependency and identity. Second, the tagged concepts are checked according to predefined constraints to discover taxonomic errors. Although OntoClean is well documented in numerous publications, it is still used rather infrequently due to the high costs of application. Especially, the manual tagging of concepts with the correct meta-properties requires substantial efforts of highly experienced ontology engineers. In order to facilitate the use of OntoClean and to enable the evaluation of real-world ontologies, we provide AEON, a tool which automatically tags concepts with appropriate OntoClean meta-properties and performs the constraint checking. We use the Web as an embodiment of world knowledge, where we search for patterns that indicate how to properly tag concepts. We thoroughly evaluated our approach against a manually created gold standard. The evaluation shows the competitiveness of our approach while at the same time significantly lowering the costs. All of our results, i.e. the tool AEON as well as the experiment data, are publicly available.}
}
%0 = article
%A = Völker, Johanna and Vrandečić, Denny and Sure, York and Hotho, Andreas
%C = Amsterdam, The Netherlands, The Netherlands
%D = 2008
%I = IOS Press
%T = AEON - An approach to the automatic evaluation of ontologies
%U = http://portal.acm.org/citation.cfm?id=1412422
Farooq, U.; Kannampallil, T. G.; Song, Y.; Ganoe, C. H.; Carroll, J. M. & Giles, L.
(2007):
Evaluating tagging behavior in social bookmarking systems: metrics and design heuristics.
In: GROUP '07: Proceedings of the 2007 international ACM conference on Conference on supporting group work,
New York, NY, USA.
[Volltext]
[BibTeX][Endnote]
@inproceedings{1316677,
author = {Farooq, Umer and Kannampallil, Thomas G. and Song, Yang and Ganoe, Craig H. and Carroll, John M. and Giles, Lee},
title = {Evaluating tagging behavior in social bookmarking systems: metrics and design heuristics},
booktitle = {GROUP '07: Proceedings of the 2007 international ACM conference on Conference on supporting group work},
publisher = {ACM},
address = {New York, NY, USA},
year = {2007},
pages = {351--360},
url = {http://portal.acm.org/citation.cfm?id=1316677&coll=Portal&dl=GUIDE&CFID=9767993&CFTOKEN=86305662},
doi = {http://doi.acm.org/10.1145/1316624.1316677},
isbn = {978-1-59593-845-9},
keywords = {collaboration, evaluation, social, tagging, taggingsurvey, toread}
}
%0 = inproceedings
%A = Farooq, Umer and Kannampallil, Thomas G. and Song, Yang and Ganoe, Craig H. and Carroll, John M. and Giles, Lee
%B = GROUP '07: Proceedings of the 2007 international ACM conference on Conference on supporting group work
%C = New York, NY, USA
%D = 2007
%I = ACM
%T = Evaluating tagging behavior in social bookmarking systems: metrics and design heuristics
%U = http://portal.acm.org/citation.cfm?id=1316677&coll=Portal&dl=GUIDE&CFID=9767993&CFTOKEN=86305662
Wang, P.; Hu, J.; Zeng, H.-J.; Chen, L. & Chen, Z.
(2007):
Improving Text Classification by Using Encyclopedia Knowledge.
In: Data Mining, 2007. ICDM 2007. Seventh IEEE International Conference on,
[Volltext]
[Kurzfassung] [BibTeX][Endnote]
The exponential growth of text documents available on the Internet has created an urgent need for accurate, fast, and general purpose text classification algorithms. However, the "bag of words" representation used for these classification methods is often unsatisfactory as it ignores relationships between important terms that do not co-occur literally. In order to deal with this problem, we integrate background knowledge - in our application: Wikipedia - into the process of classifying text documents. The experimental evaluation on Reuters newsfeeds and several other corpus shows that our classification results with encyclopedia knowledge are much better than the baseline "bag of words " methods.
@inproceedings{PuWang:2007,
author = {Wang, Pu and Hu, Jian and Zeng, Hua-Jun and Chen, Lijun and Chen, Zheng},
title = {Improving Text Classification by Using Encyclopedia Knowledge},
booktitle = {Data Mining, 2007. ICDM 2007. Seventh IEEE International Conference on},
year = {2007},
pages = {332-341},
url = {ftp://ftp.computer.org/press/outgoing/proceedings/icdm07/Data/3018a332.pdf},
doi = {10.1109/ICDM.2007.77},
isbn = {978-0-7695-3018-5},
keywords = {****, classification, evaluation, learning, ol, ontology, text, wikipedia},
abstract = {The exponential growth of text documents available on the Internet has created an urgent need for accurate, fast, and general purpose text classification algorithms. However, the "bag of words" representation used for these classification methods is often unsatisfactory as it ignores relationships between important terms that do not co-occur literally. In order to deal with this problem, we integrate background knowledge - in our application: Wikipedia - into the process of classifying text documents. The experimental evaluation on Reuters newsfeeds and several other corpus shows that our classification results with encyclopedia knowledge are much better than the baseline "bag of words " methods.}
}
%0 = inproceedings
%A = Wang, Pu and Hu, Jian and Zeng, Hua-Jun and Chen, Lijun and Chen, Zheng
%B = Data Mining, 2007. ICDM 2007. Seventh IEEE International Conference on
%D = 2007
%T = Improving Text Classification by Using Encyclopedia Knowledge
%U = ftp://ftp.computer.org/press/outgoing/proceedings/icdm07/Data/3018a332.pdf
Dellschaft, K. & Staab, S.
(2006):
On How to Perform a Gold Standard based Evaluation of Ontology Learning.
In: In: Proc. of ISWC-2006 International Semantic Web Conference,
Athens, GA, USA.
[Volltext]
[BibTeX][Endnote]
@inproceedings{dellschaft2006GoldEvalOntoLearn,
author = {Dellschaft, Klaas and Staab, Steffen},
title = {On How to Perform a Gold Standard based Evaluation of Ontology Learning},
booktitle = {In: Proc. of ISWC-2006 International Semantic Web Conference},
publisher = {Springer, LNCS},
address = {Athens, GA, USA},
year = {2006},
url = {http://iswc2006.semanticweb.org/items/paper_44.php},
keywords = {2006, evaluation, learning, ontology, toread}
}
%0 = inproceedings
%A = Dellschaft, Klaas and Staab, Steffen
%B = In: Proc. of ISWC-2006 International Semantic Web Conference
%C = Athens, GA, USA
%D = 2006
%I = Springer, LNCS
%T = On How to Perform a Gold Standard based Evaluation of Ontology Learning
%U = http://iswc2006.semanticweb.org/items/paper_44.php
Brank, J.; Grobelnik, M. & Mladenić, D.
(2005):
A Survey of Ontology Evaluation Techniques.
In: Proc. of 8th Int. multi-conf. Information Society,
[BibTeX][Endnote]
@inproceedings{brank2005,
author = {Brank, Janez and Grobelnik, Marko and Mladenić, Dunja},
title = {A Survey of Ontology Evaluation Techniques},
booktitle = {Proc. of 8th Int. multi-conf. Information Society},
year = {2005},
pages = {166--169},
keywords = {evaluation, ol, ontology, survey}
}
%0 = inproceedings
%A = Brank, Janez and Grobelnik, Marko and Mladenić, Dunja
%B = Proc. of 8th Int. multi-conf. Information Society
%D = 2005
%T = A Survey of Ontology Evaluation Techniques
Fawcett, T.
(2004):
ROC Graphs: Notes and Practical Considerations for Researchers.
[Volltext] [BibTeX]
[Endnote]
@techreport{fawcett04roc,
author = {Fawcett, T.},
title = {ROC Graphs: Notes and Practical Considerations for Researchers},
year = {2004},
url = {http://www.hpl.hp.com/techreports/2003/HPL-2003-4.pdf},
keywords = {auc, evaluation, roc, tutorial}
}
%0 = techreport
%A = Fawcett, T.
%B = }
%C =
%D = 2004
%I =
%T = ROC Graphs: Notes and Practical Considerations for Researchers}
%U = http://www.hpl.hp.com/techreports/2003/HPL-2003-4.pdf
Herlocker, J.; Konstan, J.; Terveen, L. & Riedl, J.
(2004):
Evaluating Collaborative Filtering Recommender
Systems.
In: ACM Transactions on Information Systems,
Ausgabe/Number: 1,
Vol. 22,
Erscheinungsjahr/Year: 2004.
Seiten/Pages: 5-53.
[BibTeX]
[Endnote]
@article{herlocker04:_evalcoll,
author = {Herlocker, J.L. and Konstan, J.A. and Terveen, L.G. and Riedl, J.T.},
title = {Evaluating Collaborative Filtering Recommender
Systems},
journal = {ACM Transactions on Information Systems},
year = {2004},
volume = {22},
number = {1},
pages = {5--53},
doi = {http://doi.acm.org/10.1145/223904.223929},
isbn = {0-201-84705-1},
keywords = {recommender, evaluation}
}
%0 = article
%A = Herlocker, J.L. and Konstan, J.A. and Terveen, L.G. and Riedl, J.T.
%D = 2004
%T = Evaluating Collaborative Filtering Recommender
Systems
Zhao, Y. & Karypis, G.
(2004):
Empirical and Theoretical Comparisons of Selected Criterion Functions for Document Clustering.
In: Machine Learning,
Ausgabe/Number: 3,
Vol. 55,
Erscheinungsjahr/Year: 2004.
Seiten/Pages: 311-331.
[Volltext] [BibTeX]
[Endnote]
@article{journals/ml/ZhaoK04,
author = {Zhao, Ying and Karypis, George},
title = {Empirical and Theoretical Comparisons of Selected Criterion Functions for Document Clustering},
journal = {Machine Learning},
year = {2004},
volume = {55},
number = {3},
pages = {311-331},
url = {http://glaros.dtc.umn.edu/gkhome/fetch/papers/crfunML04.pdf},
keywords = {clustering, comparisons, criterion, document, evaluation, index}
}
%0 = article
%A = Zhao, Ying and Karypis, George
%D = 2004
%T = Empirical and Theoretical Comparisons of Selected Criterion Functions for Document Clustering
%U = http://glaros.dtc.umn.edu/gkhome/fetch/papers/crfunML04.pdf
Hotho, A.; Staab, S. & Stumme, G.
(2003):
WordNet improves text document clustering.
In: Proc. of the SIGIR 2003 Semantic Web Workshop,
Toronto, Canada.
[Volltext]
[BibTeX][Endnote]
@inproceedings{hotho_sigir03,
author = {Hotho, A. and Staab, S. and Stumme, G.},
title = {WordNet improves text document clustering},
booktitle = {Proc. of the SIGIR 2003 Semantic Web Workshop},
address = {Toronto, Canada},
year = {2003},
url = {http://www.kde.cs.uni-kassel.de/hotho/pub/hothoetal_sigir_ws_sem_web.pdf},
keywords = {clustering, text, ontology, wordnet, evaluation, myown, SumSchool06, 2003}
}
%0 = inproceedings
%A = Hotho, A. and Staab, S. and Stumme, G.
%B = Proc. of the SIGIR 2003 Semantic Web Workshop
%C = Toronto, Canada
%D = 2003
%T = WordNet improves text document clustering
%U = http://www.kde.cs.uni-kassel.de/hotho/pub/hothoetal_sigir_ws_sem_web.pdf
Meila, M.
(2003):
Comparing clusterings .
In: Proc. of COLT 03,
[Volltext]
[BibTeX][Endnote]
@inproceedings{comparing2003meila,
author = {Meila, Marina},
title = {Comparing clusterings },
booktitle = {Proc. of COLT 03},
year = {2003},
url = {http://www.stat.washington.edu/mmp/www.stat.washington.edu/mmp/Papers/compare-colt.pdf},
keywords = {clustering, evaluation}
}
%0 = inproceedings
%A = Meila, Marina
%B = Proc. of COLT 03
%D = 2003
%T = Comparing clusterings
%U = http://www.stat.washington.edu/mmp/www.stat.washington.edu/mmp/Papers/compare-colt.pdf
Strehl, A. & Ghosh, J.
(2002):
Cluster Ensembles - A Knowledge Reuse Framework for Combining Multiple Partitions.
In: Journal on Machine Learning Research (JMLR),
Vol. 3,
Verlag/Publisher: MIT Press.
Erscheinungsjahr/Year: 2002.
Seiten/Pages: 583-617.
[Volltext] [Kurzfassung] [BibTeX]
[Endnote]
This paper introduces the problem of combining multiple partitionings of a set of objects into a single consolidated clustering without accessing the features or algorithms that determined these partitionings. We first identify several application scenarios for the resultant 'knowledge reuse' framework that we call cluster ensembles. The cluster ensemble problem is then formalized as a combinatorial optimization problem in terms of shared mutual information. In addition to a direct maximization approach, we propose three effective and efficient techniques for obtaining high-quality combiners (consensus functions). The first combiner induces a similarity measure from the partitionings and then reclusters the objects. The second combiner is based on hypergraph partitioning. The third one collapses groups of clusters into meta-clusters which then compete for each object to determine the combined clustering. Due to the low computational costs of our techniques, it is quite feasible to use a supra-consensus function that evaluates all three approaches against the objective function and picks the best solution for a given situation. We evaluate the effectiveness of cluster ensembles in three qualitatively different application scenarios: (i) where the original clusters were formed based on non-identical sets of features, (ii) where the original clustering algorithms worked on non-identical sets of objects, and (iii) where a common data-set is used and the main purpose of combining multiple clusterings is to improve the quality and robustness of the solution. Promising results are obtained in all three situations for synthetic as well as real data-sets.
@article{stgh02b,
author = {Strehl, Alexander and Ghosh, Joydeep},
title = {Cluster Ensembles -- A Knowledge Reuse Framework for Combining Multiple Partitions},
journal = {Journal on Machine Learning Research (JMLR)},
publisher = {MIT Press},
year = {2002},
volume = {3},
pages = {583--617},
url = {http://strehl.com/download/strehl-jmlr02.pdf},
issn = {1533-7928},
keywords = {clustering, evaluation, combination, ensembles},
abstract = {This paper introduces the problem of combining multiple partitionings of a set of objects into a single consolidated clustering without accessing the features or algorithms that determined these partitionings. We first identify several application scenarios for the resultant 'knowledge reuse' framework that we call cluster ensembles. The cluster ensemble problem is then formalized as a combinatorial optimization problem in terms of shared mutual information. In addition to a direct maximization approach, we propose three effective and efficient techniques for obtaining high-quality combiners (consensus functions). The first combiner induces a similarity measure from the partitionings and then reclusters the objects. The second combiner is based on hypergraph partitioning. The third one collapses groups of clusters into meta-clusters which then compete for each object to determine the combined clustering. Due to the low computational costs of our techniques, it is quite feasible to use a supra-consensus function that evaluates all three approaches against the objective function and picks the best solution for a given situation. We evaluate the effectiveness of cluster ensembles in three qualitatively different application scenarios: (i) where the original clusters were formed based on non-identical sets of features, (ii) where the original clustering algorithms worked on non-identical sets of objects, and (iii) where a common data-set is used and the main purpose of combining multiple clusterings is to improve the quality and robustness of the solution. Promising results are obtained in all three situations for synthetic as well as real data-sets.}
}
%0 = article
%A = Strehl, Alexander and Ghosh, Joydeep
%D = 2002
%I = MIT Press
%T = Cluster Ensembles -- A Knowledge Reuse Framework for Combining Multiple Partitions
%U = http://strehl.com/download/strehl-jmlr02.pdf
Höppner, F.; Klawonn, F.; Kruse, R. & Runkler, T. (Hrsg.)
(1999):
Fuzzy Cluster Analysis.
Erscheinungsjahr/Year: 1999.
Verlag/Publisher: John Wiley & Sons, Inc.,
[BibTeX]
[Endnote]
@book{hoeppner1999fuzzy,
author = {Höppner, Frank and Klawonn, Frank and Kruse, Rudolf and Runkler, Thomas},
title = {Fuzzy Cluster Analysis},
publisher = {John Wiley & Sons, Inc.},
year = {1999},
isbn = {3-540-40317-5},
keywords = {clustering, evaluation, fuzzy, overview}
}
%0 = book
%A = Höppner, Frank and Klawonn, Frank and Kruse, Rudolf and Runkler, Thomas
%D = 1999
%I = John Wiley & Sons, Inc.
%T = Fuzzy Cluster Analysis
Weingessel, A.; Dimitriadou, E. & Dolnicar, S.
(1999):
An examination of indexes for determining the number of clusters in binary data sets.
[Volltext] [BibTeX]
[Endnote]
@techreport{Weingessel99,
author = {Weingessel, A. and Dimitriadou, E. and Dolnicar, S.},
title = {An examination of indexes for determining the number of clusters in binary data sets},
year = {1999},
number = {Working Paper 29},
url = {http://epub.wu-wien.ac.at/dyn/virlib/wp/showentry?ID=epub-wu-01_188},
keywords = {clustering, evaluation}
}
%0 = techreport
%A = Weingessel, A. and Dimitriadou, E. and Dolnicar, S.
%B = }
%C =
%D = 1999
%I =
%T = An examination of indexes for determining the number of clusters in binary data sets}
%U = http://epub.wu-wien.ac.at/dyn/virlib/wp/showentry?ID=epub-wu-01_188
Bezdek, J. C.; Li, W. Q.; Attikiouzel, Y. & Windham, M.
(1997):
A geometric approach to cluster validity for normal mixtures.
In: Soft Computing - A Fusion of Foundations, Methodologies and Applications,
Ausgabe/Number: 4,
Vol. 1,
Erscheinungsjahr/Year: 1997.
Seiten/Pages: 166-179.
[Volltext] [Kurzfassung] [BibTeX]
[Endnote]
We study indices for choosing the correct number of components in a mixture of normal distributions. Previous studies have been confined to indices based wholly on probabilistic models. Viewing mixture decomposition as probabilistic clustering (where the emphasis is on partitioning for geometric substructure) as opposed to parametric estimation enables us to introduce both fuzzy and crisp measures of cluster validity for this problem. We presume the underlying samples to be unlabeled, and use the expectation-maximization (EM) algorithm to find clusters in the data. We test 16 probabilistic, 3 fuzzy and 4 crisp indices on 12 data sets that are samples from bivariate normal mixtures having either 3 or 6 components. Over three run averages based on different initializations of EM, 10 of the 23 indices tested for choosing the right number of mixture components were correct in at least 9 of the 12 trials. Among these were the fuzzy index of Xie-Beni, the crisp Davies-Bouldin index, and two crisp indices that are recent generalizations of Dunn's index.
-
@article{bezdek1997,
author = {Bezdek, J. C. and Li, W. Q. and Attikiouzel, Y. and Windham, M.},
title = {A geometric approach to cluster validity for normal mixtures},
journal = {Soft Computing - A Fusion of Foundations, Methodologies and Applications},
year = {1997},
volume = {1},
number = {4},
pages = {166--179},
url = {http://dx.doi.org/10.1007/s005000050019},
keywords = {cluster, evaluation, index},
abstract = {We study indices for choosing the correct number of components in a mixture of normal distributions. Previous studies have been confined to indices based wholly on probabilistic models. Viewing mixture decomposition as probabilistic clustering (where the emphasis is on partitioning for geometric substructure) as opposed to parametric estimation enables us to introduce both fuzzy and crisp measures of cluster validity for this problem. We presume the underlying samples to be unlabeled, and use the expectation-maximization (EM) algorithm to find clusters in the data. We test 16 probabilistic, 3 fuzzy and 4 crisp indices on 12 data sets that are samples from bivariate normal mixtures having either 3 or 6 components. Over three run averages based on different initializations of EM, 10 of the 23 indices tested for choosing the right number of mixture components were correct in at least 9 of the 12 trials. Among these were the fuzzy index of Xie-Beni, the crisp Davies-Bouldin index, and two crisp indices that are recent generalizations of Dunn's index.
ER -}
}
%0 = article
%A = Bezdek, J. C. and Li, W. Q. and Attikiouzel, Y. and Windham, M.
%D = 1997
%T = A geometric approach to cluster validity for normal mixtures
%U = http://dx.doi.org/10.1007/s005000050019
Fickel, N.
(1997):
Clusteranalyse mit gemischt-skalierten Merkmalen: Abstrahierung vom Skalenniveau.
In: Allgemeines Statistisches Archiv, Vandenhoeck & Ruprecht in Göttingen,
Ausgabe/Number: 3,
Vol. 81,
Erscheinungsjahr/Year: 1997.
Seiten/Pages: 249-265.
[BibTeX]
[Endnote]
@article{fickel97,
author = {Fickel, N.},
title = {Clusteranalyse mit gemischt-skalierten Merkmalen: Abstrahierung vom Skalenniveau},
journal = {Allgemeines Statistisches Archiv, Vandenhoeck & Ruprecht in Göttingen},
year = {1997},
volume = {81},
number = {3},
pages = {249-265},
keywords = {clustering, evaluation}
}
%0 = article
%A = Fickel, N.
%D = 1997
%T = Clusteranalyse mit gemischt-skalierten Merkmalen: Abstrahierung vom Skalenniveau
Salton, G. & Buckley, C.
(1988):
On the use of spreading activation methods in automatic information.
In: SIGIR '88: Proceedings of the 11th annual international ACM SIGIR conference on Research and development in information retrieval,
New York, NY, USA.
[Volltext]
[Kurzfassung] [BibTeX][Endnote]
Spreading activation methods have been recommended in information retrieval to expand the search vocabulary and to complement the retrieved document sets. The spreading activation strategy is reminiscent of earlier associative indexing and retrieval systems. Some spreading activation procedures are briefly described, and evaluation output is given, reflecting the effectiveness of one of the proposed procedures.
@inproceedings{salton1988spreading,
author = {Salton, G. and Buckley, C.},
title = {On the use of spreading activation methods in automatic information},
booktitle = {SIGIR '88: Proceedings of the 11th annual international ACM SIGIR conference on Research and development in information retrieval},
publisher = {ACM Press},
address = {New York, NY, USA},
year = {1988},
pages = {147--160},
url = {http://portal.acm.org/citation.cfm?id=62447&dl=ACM&coll=GUIDE},
doi = {http://doi.acm.org/10.1145/62437.62447},
isbn = {2-7061-0309-4},
keywords = {***, activation, evaluation, ir, msn, network, semantic, spreading},
abstract = {Spreading activation methods have been recommended in information retrieval to expand the search vocabulary and to complement the retrieved document sets. The spreading activation strategy is reminiscent of earlier associative indexing and retrieval systems. Some spreading activation procedures are briefly described, and evaluation output is given, reflecting the effectiveness of one of the proposed procedures.}
}
%0 = inproceedings
%A = Salton, G. and Buckley, C.
%B = SIGIR '88: Proceedings of the 11th annual international ACM SIGIR conference on Research and development in information retrieval
%C = New York, NY, USA
%D = 1988
%I = ACM Press
%T = On the use of spreading activation methods in automatic information
%U = http://portal.acm.org/citation.cfm?id=62447&dl=ACM&coll=GUIDE
Siegel, S. & Castellan, N. (Hrsg.)
(1988):
Nonparametric statistics for the behavioral sciences.
Second. Aufl./Vol..
Erscheinungsjahr/Year: 1988.
Verlag/Publisher: McGraw--Hill, Inc.,
[BibTeX]
[Endnote]
@book{siegel:b88,
author = {Siegel, S. and Castellan, N.J.},
title = {Nonparametric statistics for the behavioral sciences},
publisher = {McGraw--Hill, Inc.},
year = {1988},
edition = {Second},
keywords = {evaluation, statistic, test}
}
%0 = book
%A = Siegel, S. and Castellan, N.J.
%D = 1988
%I = McGraw--Hill, Inc.
%T = Nonparametric statistics for the behavioral sciences