@inproceedings{illig2011comparison, abstract = {Recommendation algorithms and multi-class classifiers can support users of social bookmarking systems in assigning tags to their bookmarks. Content based recommenders are the usual approach for facing the cold start problem, i.e., when a bookmark is uploaded for the first time and no information from other users can be exploited. In this paper, we evaluate several recommendation algorithms in a cold-start scenario on a large real-world dataset. }, address = {Berlin/Heidelberg}, author = {Illig, Jens and Hotho, Andreas and Jäschke, Robert and Stumme, Gerd}, booktitle = {Knowledge Processing and Data Analysis}, doi = {10.1007/978-3-642-22140-8_9}, editor = {Wolff, Karl Erich and Palchunov, Dmitry E. and Zagoruiko, Nikolay G. and Andelfinger, Urs}, interhash = {cd3420c0f73761453320dc528b3d1e14}, intrahash = {f9d6e06ab0f2fdcebb77afa97d72e40a}, isbn = {978-3-642-22139-2}, pages = {136--149}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, title = {A Comparison of Content-Based Tag Recommendations in Folksonomy Systems}, url = {http://dx.doi.org/10.1007/978-3-642-22140-8_9}, volume = 6581, year = 2011 } @inproceedings{vinh2009information, abstract = {Information theoretic based measures form a fundamental class of similarity measures for comparing clusterings, beside the class of pair-counting based and set-matching based measures. In this paper, we discuss the necessity of correction for chance for information theoretic based measures for clusterings comparison. We observe that the baseline for such measures, i.e. average value between random partitions of a data set, does not take on a constant value, and tends to have larger variation when the ratio between the number of data points and the number of clusters is small. This effect is similar in some other non-information theoretic based measures such as the well-known Rand Index. Assuming a hypergeometric model of randomness, we derive the analytical formula for the expected mutual information value between a pair of clusterings, and then propose the adjusted version for several popular information theoretic based measures. Some examples are given to demonstrate the need and usefulness of the adjusted measures.}, address = {New York, NY, USA}, author = {Vinh, Nguyen Xuan and Epps, Julien and Bailey, James}, booktitle = {ICML '09: Proceedings of the 26th Annual International Conference on Machine Learning}, doi = {10.1145/1553374.1553511}, interhash = {ddd96b934438029873242aeabc26a201}, intrahash = {bed9702898bc8c50faa21eabd068b8d9}, isbn = {978-1-60558-516-1}, location = {Montreal, Quebec, Canada}, pages = {1073--1080}, publisher = {ACM}, title = {Information theoretic measures for clusterings comparison: is a correction for chance necessary?}, url = {http://portal.acm.org/citation.cfm?id=1553511}, year = 2009 } @article{newman2003why, author = {Newman, MEJ and Park, J.}, interhash = {c074e9640dd0a12bdcb5165afcab5981}, intrahash = {63485bb49644f266dec9a4fddf447fb9}, journal = {Physical Review E}, number = 3, pages = 36122, publisher = {APS}, title = {{Why social networks are different from other types of networks}}, url = {http://scholar.google.de/scholar.bib?q=info:Ep_ADiiiopYJ:scholar.google.com/&output=citation&hl=de&as_sdt=2000&ct=citation&cd=0}, volume = 68, year = 2003 } @misc{capocci2007taxonomy, abstract = { In this paper we investigate the nature and structure of the relation between imposed classifications and real clustering in a particular case of a scale-free network given by the on-line encyclopedia Wikipedia. We find a statistical similarity in the distributions of community sizes both by using the top-down approach of the categories division present in the archive and in the bottom-up procedure of community detection given by an algorithm based on the spectral properties of the graph. Regardless the statistically similar behaviour the two methods provide a rather different division of the articles, thereby signaling that the nature and presence of power laws is a general feature for these systems and cannot be used as a benchmark to evaluate the suitability of a clustering method.}, author = {Capocci, A. and Rao, F. and Caldarelli, G.}, interhash = {df8a20aa40cce46aa0adf4f6360664dc}, intrahash = {9c69bc97d22b7e5c2d90d8765b491a16}, title = {Taxonomy and clustering in collaborative systems: the case of the on-line encyclopedia Wikipedia}, url = {http://www.citebase.org/abstract?id=oai:arXiv.org:0710.3058}, year = 2007 } @article{alkhalifa2006folksonomies, abstract = {Semantic Metadata, which describes the meaning of documents, can be produced either manually or else semi-automatically using information extraction techniques. Manual techniques are expensive if they rely on skilled cataloguers, but a possible alternative is to make use of community produced annotations such as those collected in folksonomies. This paper reports on an experiment that we carried out to validate the assumption that folksonomies contain higher semantic value than keywords extracted by machines. The experiment has been carried-out in two ways: subjectively, by asking a human indexer to evaluate the quality of the generated keywords from both systems; and automatically, by measuring the percentage of overlap between the folksonomy set and machine generated keywords set. The result of the experiment can be considered as evidence for the rich semantics of folksonomies, demonstrating that folksonomies used in the del.icio.us bookmarking service can be used in the process of generating semantic metadata to annotate web resources.}, author = {Al-Khalifa, Hend S. and Davis, Hugh C.}, editor = {Isa{\'i}as, Pedro and Paprzycki, Marcin}, file = {alkhalifa2006folksonomies.pdf:alkhalifa2006folksonomies.pdf:PDF}, groups = {public}, interhash = {8b4479da8117e327caa5c3bc91c4c1b7}, intrahash = {feb0b9487f8a40c60fd62dd440ea318c}, journal = {IADIS INTERNATIONAL JOURNAL ON COMPUTER SCIENCE AND INFORMATION SYSTEMS (IJCSIS)}, journalpub = {1}, month = {October}, number = {Number}, pages = {132--143}, publisher = {IADIS ? International Association for Development of the Information Society}, timestamp = {2011-02-02 14:15:35}, title = {Folksonomies versus Automatic Keyword Extraction: An Empirical Study}, url = {http://eprints.ecs.soton.ac.uk/13155/}, username = {dbenz}, volume = {Vol. 1}, year = 2006 }