@article{jaeschke2008tag, abstract = {Collaborative tagging systems allow users to assign keywords - so called "tags" - to resources. Tags are used for navigation, finding resources and serendipitous browsing and thus provide an immediate benefit for users. These systems usually include tag recommendation mechanisms easing the process of finding good tags for a resource, but also consolidating the tag vocabulary across users. In practice, however, only very basic recommendation strategies are applied. In this paper we evaluate and compare several recommendation algorithms on large-scale real life datasets: an adaptation of user-based collaborative filtering, a graph-based recommender built on top of the FolkRank algorithm, and simple methods based on counting tag occurences. We show that both FolkRank and Collaborative Filtering provide better results than non-personalized baseline methods. Moreover, since methods based on counting tag occurrences are computationally cheap, and thus usually preferable for real time scenarios, we discuss simple approaches for improving the performance of such methods. We show, how a simple recommender based on counting tags from users and resources can perform almost as good as the best recommender. }, address = {Amsterdam}, author = {Jäschke, Robert and Marinho, Leandro and Hotho, Andreas and Schmidt-Thieme, Lars and Stumme, Gerd}, doi = {10.3233/AIC-2008-0438}, editor = {Giunchiglia, Enrico}, interhash = {b2f1aba6829affc85d852ea93a8e39f7}, intrahash = {955bcf14f3272ba6eaf3dadbef6c0b10}, issn = {0921-7126}, journal = {AI Communications}, month = dec, number = 4, pages = {231--247}, publisher = {IOS Press}, title = {Tag Recommendations in Social Bookmarking Systems}, url = {http://www.kde.cs.uni-kassel.de/pub/pdf/jaeschke2008tag.pdf}, vgwort = {63}, volume = 21, year = 2008 } @article{jaeschke2008discovering, abstract = {Social bookmarking tools are rapidly emerging on the Web. In such systems users are setting up lightweight conceptual structures called folksonomies. Unlike ontologies, shared conceptualizations are not formalized, but rather implicit. We present a new data mining task, the mining of all frequent tri-concepts, together with an efficient algorithm, for discovering these implicit shared conceptualizations. Our approach extends the data mining task of discovering all closed itemsets to three-dimensional data structures to allow for mining folksonomies. We provide a formal definition of the problem, and present an efficient algorithm for its solution. Finally, we show the applicability of our approach on three large real-world examples.}, address = {New York}, author = {Jäschke, Robert and Hotho, Andreas and Schmitz, Christoph and Ganter, Bernhard and Stumme, Gerd}, booktitle = {Semantic Web and Web 2.0}, doi = {10.1016/j.websem.2007.11.004}, editor = {Finin, T. and Mizoguchi, R. and Staab, S.}, interhash = {cfca594f9dbe30694bfbcdeb40dc4e88}, intrahash = {18e8babe208fae2c0342438617b0ec31}, issn = {1570-8268}, journal = {Web Semantics: Science, Services and Agents on the World Wide Web}, month = feb, number = 1, pages = {38--53}, publisher = {Elsevier}, title = {Discovering Shared Conceptualizations in Folksonomies}, url = {http://www.kde.cs.uni-kassel.de/pub/pdf/jaeschke2008discovering.pdf}, vgwort = {59}, volume = 6, year = 2008 } @article{benz2010social, abstract = {Social resource sharing systems are central elements of the Web 2.0 and use the same kind of lightweight knowledge representation, called folksonomy. Their large user communities and ever-growing networks of user-generated content have made them an attractive object of investigation for researchers from different disciplines like Social Network Analysis, Data Mining, Information Retrieval or Knowledge Discovery. In this paper, we summarize and extend our work on different aspects of this branch of Web 2.0 research, demonstrated and evaluated within our own social bookmark and publication sharing system BibSonomy, which is currently among the three most popular systems of its kind. We structure this presentation along the different interaction phases of a user with our system, coupling the relevant research questions of each phase with the corresponding implementation issues. This approach reveals in a systematic fashion important aspects and results of the broad bandwidth of folksonomy research like capturing of emergent semantics, spam detection, ranking algorithms, analogies to search engine log data, personalized tag recommendations and information extraction techniques. We conclude that when integrating a real-life application like BibSonomy into research, certain constraints have to be considered; but in general, the tight interplay between our scientific work and the running system has made BibSonomy a valuable platform for demonstrating and evaluating Web 2.0 research.}, address = {Berlin/Heidelberg}, author = {Benz, Dominik and Hotho, Andreas and Jäschke, Robert and Krause, Beate and Mitzlaff, Folke and Schmitz, Christoph and Stumme, Gerd}, doi = {10.1007/s00778-010-0208-4}, interhash = {57fe43734b18909a24bf5bf6608d2a09}, intrahash = {c9437d5ec56ba949f533aeec00f571e3}, issn = {1066-8888}, journal = {The VLDB Journal}, month = dec, number = 6, pages = {849--875}, publisher = {Springer}, title = {The Social Bookmark and Publication Management System {BibSonomy}}, url = {http://www.kde.cs.uni-kassel.de/pub/pdf/benz2010social.pdf}, volume = 19, year = 2010 } @article{wu2008wu, abstract = {This paper presents the top 10 data mining algorithms identified by the IEEE International Conference on Data Mining (ICDM) in December 2006: C4.5, k-Means, SVM, Apriori, EM, PageRank, AdaBoost, kNN, Naive Bayes, and CART. These top 10 algorithms are among the most influential data mining algorithms in the research community.With each algorithm, we provide a description of the algorithm, discuss the impact of the algorithm, and review current andfurther research on the algorithm. These 10 algorithms cover classification, clustering, statistical learning, associationanalysis, and link mining, which are all among the most important topics in data mining research and development.}, address = {London}, author = {Wu, Xindong and Kumar, Vipin and Quinlan, J. Ross and Ghosh, Joydeep and Yang, Qiang and Motoda, Hiroshi and McLachlan, Geoffrey and Ng, Angus and Liu, Bing and Yu, Philip and Zhou, Zhi-Hua and Steinbach, Michael and Hand, David and Steinberg, Dan}, interhash = {76fd294a34cf85638f6e194a85af8db9}, intrahash = {2c34bb4b49187a6d3e780e78d254ae1f}, issn = {0219-1377}, journal = {Knowledge and Information Systems}, month = Jan, number = 1, pages = {1--37}, publisher = {Springer}, title = {Top 10 algorithms in data mining}, url = {http://dx.doi.org/10.1007/s10115-007-0114-2}, volume = 14, year = 2008 }