@inproceedings{cattuto2008semantic, abstract = {Collaborative tagging systems have nowadays become important data sources for populating semantic web applications. For tasks like synonym detection and discovery of concept hierarchies, many researchers introduced measures of tag similarity. Eventhough most of these measures appear very natural, their design often seems to be rather ad hoc, and the underlying assumptionson the notion of similarity are not made explicit. A more systematic characterization and validation of tag similarity interms of formal representations of knowledge is still lacking. Here we address this issue and analyze several measures oftag similarity: Each measure is computed on data from the social bookmarking system del.icio.us and a semantic grounding isprovided by mapping pairs of similar tags in the folksonomy to pairs of synsets in Wordnet, where we use validated measuresof semantic distance to characterize the semantic relation between the mapped tags. This exposes important features of theinvestigated similarity measures and indicates which ones are better suited in the context of a given semantic application.}, author = {Cattuto, Ciro and Benz, Dominik and Hotho, Andreas and Stumme, Gerd}, booktitle = {The Semantic Web - ISWC 2008}, doi = {10.1007/978-3-540-88564-1_39}, interhash = {b44538648cfd476d6c94e30bc6626c86}, intrahash = {4752f261d03cead0c52565148a0ba1c9}, isbn = {978-3-540-88563-4}, pages = {615--631}, publisher = {Springer Berlin / Heidelberg}, series = {Lecture Notes in Computer Science}, title = {Semantic Grounding of Tag Relatedness in Social Bookmarking Systems}, url = {http://www.kde.cs.uni-kassel.de/pub/pdf/cattuto2008semantica.pdf}, volume = 5318, year = 2008 } @inproceedings{1379123, abstract = {Social bookmarking systems constitute an established part of the Web 2.0. In such systems users describe bookmarks by keywords called tags. The structure behind these social systems, called folksonomies, can be viewed as a tripartite hypergraph of user, tag and resource nodes. This underlying network shows specific structural properties that explain its growth and the possibility of serendipitous exploration. Today's search engines represent the gateway to retrieve information from the World Wide Web. Short queries typically consisting of two to three words describe a user's information need. In response to the displayed results of the search engine, users click on the links of the result page as they expect the answer to be of relevance. This clickdata can be represented as a folksonomy in which queries are descriptions of clicked URLs. The resulting network structure, which we will term logsonomy is very similar to the one of folksonomies. In order to find out about its properties, we analyze the topological characteristics of the tripartite hypergraph of queries, users and bookmarks on a large snapshot of del.icio.us and on query logs of two large search engines. All of the three datasets show small world properties. The tagging behavior of users, which is explained by preferential attachment of the tags in social bookmark systems, is reflected in the distribution of single query words in search engines. We can conclude that the clicking behaviour of search engine users based on the displayed search results and the tagging behaviour of social bookmarking users is driven by similar dynamics.}, address = {New York, NY, USA}, author = {Krause, Beate and Jäschke, Robert and Hotho, Andreas and Stumme, Gerd}, booktitle = {HT '08: Proceedings of the nineteenth ACM conference on Hypertext and hypermedia}, doi = {http://doi.acm.org/10.1145/1379092.1379123}, interhash = {6d34ea1823d95b9dbf37d4db4d125d2a}, intrahash = {c7f43f2f922de1e7febedd10347e80cb}, isbn = {978-1-59593-985-2}, location = {Pittsburgh, PA, USA}, pages = {157--166}, publisher = {ACM}, title = {Logsonomy - social information retrieval with logdata}, url = {http://portal.acm.org/citation.cfm?id=1379092.1379123&coll=ACM&dl=ACM&type=series&idx=SERIES399&part=series&WantType=Proceedings&title=HT&CFID=825963&CFTOKEN=78379687}, year = 2008 } @inproceedings{krause2008comparison, abstract = {Social bookmarking systems allow users to store links to internet resources on a web page. As social bookmarking systems are growing in popularity, search algorithms have been developed that transfer the idea of link-based rankings in the Web to a social bookmarking system’s data structure. These rankings differ from traditional search engine rankings in that they incorporate the rating of users. In this study, we compare search in social bookmarking systems with traditionalWeb search. In the first part, we compare the user activity and behaviour in both kinds of systems, as well as the overlap of the underlying sets of URLs. In the second part,we compare graph-based and vector space rankings for social bookmarking systems with commercial search engine rankings. Our experiments are performed on data of the social bookmarking system Del.icio.us and on rankings and log data from Google, MSN, and AOL. We will show that part of the difference between the systems is due to different behaviour (e. g., the concatenation of multi-word lexems to single terms in Del.icio.us), and that real-world events may trigger similar behaviour in both kinds of systems. We will also show that a graph-based ranking approach on folksonomies yields results that are closer to the rankings of the commercial search engines than vector space retrieval, and that the correlation is high in particular for the domains that are well covered by the social bookmarking system.}, author = {Krause, Beate and Hotho, Andreas and Stumme, Gerd}, booktitle = {Advances in Information Retrieval, 30th European Conference on IR Research, ECIR 2008}, interhash = {37598733b747093d97a0840a11beebf5}, intrahash = {613f5c41ff759fc548c9085102d1c933}, pages = {101-113}, publisher = {Springer}, title = {A Comparison of Social Bookmarking with Traditional Search}, url = {http://www.kde.cs.uni-kassel.de/hotho/pub/2008/ecir2008krause.pdf}, volume = 4956, year = 2008 } @inproceedings{benz2008analyzing, abstract = {The objective of our group was to exploit state-of-the-art Information Retrieval methods for finding associations and dependencies between tags, capturing and representing differences in tagging behavior and vocabulary of various folksonomies, with the overall aim to better understand the semantics of tags and the tagging process. Therefore we analyze the semantic content of tags in the Flickr and Delicious folksonomies. We find that: tag context similarity leads to meaningful results in Flickr, despite its narrow folksonomy character; the comparison of tags across Flickr and Delicious shows little semantic overlap, being tags in Flickr associated more to visual aspects rather than technological as it seems to be in Delicious; there are regions in the tag-tag space, provided with the cosine similarity metric, that are characterized by high density; the order of tags inside a post has a semantic relevance. }, address = {Dagstuhl, Germany}, author = {Benz, Dominik and Grobelnik, Marko and Hotho, Andreas and Jäschke, Robert and Mladenic, Dunja and Servedio, Vito D. P. and Sizov, Sergej and Szomszor, Martin}, booktitle = {Social Web Communities}, editor = {Alani, Harith and Staab, Steffen and Stumme, Gerd}, interhash = {d738d9d90c1c466ee0a73ac0cc3dc4c1}, intrahash = {7ab57438aa5a68137e46dab8dadd4b2c}, issn = {1862-4405}, number = 08391, publisher = {Schloss Dagstuhl - Leibniz-Zentrum fuer Informatik}, series = {Dagstuhl Seminar Proceedings}, title = {Analyzing Tag Semantics Across Collaborative Tagging Systems}, url = {http://drops.dagstuhl.de/opus/volltexte/2008/1785}, year = 2008 } @inproceedings{anti2008krause, address = {New York, NY, USA}, author = {Krause, Beate and Schmitz, Christoph and Hotho, Andreas and Stumme, Gerd}, booktitle = {AIRWeb '08: Proceedings of the 4th international workshop on Adversarial information retrieval on the web}, doi = {http://doi.acm.org/10.1145/1451983.1451998}, interhash = {a45d40ac7776551301ad9dde5b25357f}, intrahash = {68effe5d4b9460f9388e7685310f74c2}, isbn = {978-1-60558-159-0}, location = {Beijing, China}, pages = {61--68}, publisher = {ACM}, title = {The Anti-Social Tagger - Detecting Spam in Social Bookmarking Systems}, url = {http://airweb.cse.lehigh.edu/2008/submissions/krause_2008_anti_social_tagger.pdf}, year = 2008 } @incollection{1420085867, asin = {1420085867}, author = {May, Michael and Berendt, Bettina and Cornuéjols, Antoine and Gama, Jõao and Giannotti, Fosca and Hotho, Andreas and Malerba, Donato and Menesalvas, Ernestina and Morik, Katharina and Pedersen, Rasmus and Saitta, Lorenza and Saygin, Yücel and Schuster, Assaf and Vanhoof, Koen}, booktitle = {Next Generation of Data Mining (Chapman & Hall/Crc Data Mining and Knowledge Discovery Series)}, dewey = {005.74}, ean = {9781420085860}, edition = 1, interhash = {7aeb3b998b5918d86093e05601e81b4d}, intrahash = {be3c753af98ab591b4f31d349513b461}, isbn = {1420085867}, isbn13 = {9781420085860}, publisher = {Chapman & Hall/CRC}, title = {Research Challenges in Ubiquitous Knowledge Discovery}, url = {http://208.254.79.11/shopping_cart/products/product_contents.asp?id=&parent_id=497&sku=C5867&isbn=9781420085860&pc=}, year = 2008 } @article{jaeschke2008tag, abstract = {Collaborative tagging systems allow users to assign keywords - so called "tags" - to resources. Tags are used for navigation, finding resources and serendipitous browsing and thus provide an immediate benefit for users. These systems usually include tag recommendation mechanisms easing the process of finding good tags for a resource, but also consolidating the tag vocabulary across users. In practice, however, only very basic recommendation strategies are applied. In this paper we evaluate and compare several recommendation algorithms on large-scale real life datasets: an adaptation of user-based collaborative filtering, a graph-based recommender built on top of the FolkRank algorithm, and simple methods based on counting tag occurences. We show that both FolkRank and Collaborative Filtering provide better results than non-personalized baseline methods. Moreover, since methods based on counting tag occurrences are computationally cheap, and thus usually preferable for real time scenarios, we discuss simple approaches for improving the performance of such methods. We show, how a simple recommender based on counting tags from users and resources can perform almost as good as the best recommender. }, address = {Amsterdam}, author = {Jäschke, Robert and Marinho, Leandro and Hotho, Andreas and Schmidt-Thieme, Lars and Stumme, Gerd}, doi = {10.3233/AIC-2008-0438}, editor = {Giunchiglia, Enrico}, interhash = {b2f1aba6829affc85d852ea93a8e39f7}, intrahash = {955bcf14f3272ba6eaf3dadbef6c0b10}, issn = {0921-7126}, journal = {AI Communications}, number = 4, pages = {231-247}, publisher = {IOS Press}, title = {Tag Recommendations in Social Bookmarking Systems}, url = {http://dx.doi.org/10.3233/AIC-2008-0438}, vgwort = {63}, volume = 21, year = 2008 } @article{voelker2008aeon, abstract = {OntoClean is an approach towards the formal evaluation of taxonomic relations in ontologies. The application of OntoClean consists of two main steps. First, concepts are tagged according to meta-properties known as rigidity, unity, dependency and identity. Second, the tagged concepts are checked according to predefined constraints to discover taxonomic errors. Although OntoClean is well documented in numerous publications, it is still used rather infrequently due to the high costs of application. Especially, the manual tagging of concepts with the correct meta-properties requires substantial efforts of highly experienced ontology engineers. In order to facilitate the use of OntoClean and to enable the evaluation of real-world ontologies, we provide AEON, a tool which automatically tags concepts with appropriate OntoClean meta-properties and performs the constraint checking. We use the Web as an embodiment of world knowledge, where we search for patterns that indicate how to properly tag concepts. We thoroughly evaluated our approach against a manually created gold standard. The evaluation shows the competitiveness of our approach while at the same time significantly lowering the costs. All of our results, i.e. the tool AEON as well as the experiment data, are publicly available.}, address = {Amsterdam, The Netherlands, The Netherlands}, author = {Völker, Johanna and Vrandečić, Denny and Sure, York and Hotho, Andreas}, interhash = {f14794f4961d0127dc50c1938eaef7ea}, intrahash = {f8f0bb3e3495e7627770b470d1a5f1a3}, issn = {1570-5838}, journal = {Applied Ontology}, number = {1-2}, pages = {41--62}, publisher = {IOS Press}, title = {AEON - An approach to the automatic evaluation of ontologies}, url = {http://portal.acm.org/citation.cfm?id=1412422}, volume = 3, year = 2008 } @book{berendt2008challenge, editor = {Berendt, Bettina and Glance, Natalie and Hotho, Andreas}, interhash = {ab9a3d6f27b2bc93cb8137a0f7988271}, intrahash = {50f6cd75018d7ee98f49c5ab4ef0faaf}, publisher = {Workshop at 18th Europ. Conf. on Machine Learning (ECML'08) / 11th Europ. Conf. on Principles and Practice of Knowledge Discovery in Databases (PKDD'08)}, title = {Wikis, Blogs, Bookmarking Tools - Mining the Web 2.0 Workshop}, url = {http://www.kde.cs.uni-kassel.de/ws/wbbtmine2008/pdf/all_wbbtmine2008.pdf}, year = 2008 } @book{hotho2008challenge, editor = {Hotho, Andreas and Benz, Dominik and Jäschke, Robert and Krause, Beate}, interhash = {fbcdc431904808bb868f09734b91af87}, intrahash = {1d5d5ef0bb222cb2f3adef4d6b06f1ea}, publisher = {Workshop at 18th Europ. Conf. on Machine Learning (ECML'08) / 11th Europ. Conf. on Principles and Practice of Knowledge Discovery in Databases (PKDD'08)}, title = {ECML PKDD Discovery Challenge 2008 (RSDC'08)}, url = {http://www.kde.cs.uni-kassel.de/ws/rsdc08/pdf/all_rsdc_v2.pdf}, year = 2008 } @inbook{hotho2008bookmarking, address = {München}, asin = {3486585797}, author = {Hotho, Andreas}, booktitle = {Web 2.0 in der Unternehmenspraxis: Grundlagen, Fallstudien und Trends zum Einsatz von Social Software}, ean = {9783486585797}, editor = {Back, Andrea and Gronau, Norbert and Tochtermann, Klaus}, interhash = {1418948ca884cd3456a95b30e366ee8f}, intrahash = {b54f6557893e3ab9d1eb83b0baeb136e}, isbn = {9783486585797}, pages = {26-38}, publisher = {Oldenbourg Verlag}, title = {Social Bookmarking}, url = {http://www.amazon.de/gp/redirect.html%3FASIN=3486585797%26tag=ws%26lcode=xm2%26cID=2025%26ccmID=165953%26location=/Web-2-0-Unternehmenspraxis-Grundlagen-Fallstudien/dp/3486585797%253FSubscriptionId=13CT5CVB80YFWJEPWS02}, year = 2008 } @article{jaeschke2008discovering, abstract = {Social bookmarking tools are rapidly emerging on the Web. In such systems users are setting up lightweight conceptual structures called folksonomies. Unlike ontologies, shared conceptualizations are not formalized, but rather implicit. We present a new data mining task, the mining of all frequent tri-concepts, together with an efficient algorithm, for discovering these implicit shared conceptualizations. Our approach extends the data mining task of discovering all closed itemsets to three-dimensional data structures to allow for mining folksonomies. We provide a formal definition of the problem, and present an efficient algorithm for its solution. Finally, we show the applicability of our approach on three large real-world examples.}, author = {Jäschke, Robert and Hotho, Andreas and Schmitz, Christoph and Ganter, Bernhard and Stumme, Gerd}, booktitle = {Semantic Web and Web 2.0}, interhash = {cfca594f9dbe30694bfbcdeb40dc4e88}, intrahash = {63901930c137df0c2dad84075c564b14}, journal = {Web Semantics: Science, Services and Agents on the World Wide Web}, month = feb, number = 1, pages = {38--53}, title = {Discovering Shared Conceptualizations in Folksonomies}, url = {http://www.sciencedirect.com/science/article/B758F-4R53WD4-1/2/ae56bd6e7132074272ca2035be13781b}, volume = 6, year = 2008 } @misc{cattuto-2008, abstract = { Social bookmarking systems allow users to organise collections of resources on the Web in a collaborative fashion. The increasing popularity of these systems as well as first insights into their emergent semantics have made them relevant to disciplines like knowledge extraction and ontology learning. The problem of devising methods to measure the semantic relatedness between tags and characterizing it semantically is still largely open. Here we analyze three measures of tag relatedness: tag co-occurrence, cosine similarity of co-occurrence distributions, and FolkRank, an adaptation of the PageRank algorithm to folksonomies. Each measure is computed on tags from a large-scale dataset crawled from the social bookmarking system del.icio.us. To provide a semantic grounding of our findings, a connection to WordNet (a semantic lexicon for the English language) is established by mapping tags into synonym sets of WordNet, and applying there well-known metrics of semantic similarity. Our results clearly expose different characteristics of the selected measures of relatedness, making them applicable to different subtasks of knowledge extraction such as synonym detection or discovery of concept hierarchies.}, author = {Cattuto, Ciro and Benz, Dominik and Hotho, Andreas and Stumme, Gerd}, interhash = {cc62b733f6e0402db966d6dbf1b7711f}, intrahash = {78fd64c3db55e6387ebdeb6c40054542}, title = {Semantic Analysis of Tag Similarity Measures in Collaborative Tagging Systems}, url = {http://www.citebase.org/abstract?id=oai:arXiv.org:0805.2045}, year = 2008 } @inproceedings{Jaeschke2008logsonomy, abstract = {In social bookmarking systems users describe bookmarks by keywords called tags. The structure behind these social systems, called folksonomies, can be viewed as a tripartite hypergraph of user, tag and resource nodes. This underlying network shows specific structural properties that explain its growth and the possibility of serendipitous exploration. Search engines filter the vast information of the web. Queries describe a user’s information need. In response to the displayed results of the search engine, users click on the links of the result page as they expect the answer to be of relevance. The clickdata can be represented as a folksonomy in which queries are descriptions of clicked URLs. This poster analyzes the topological characteristics of the resulting tripartite hypergraph of queries, users and bookmarks of two query logs and compares it two a snapshot of the folksonomy del.icio.us.}, author = {Jäschke, Robert and Krause, Beate and Hotho, Andreas and Stumme, Gerd}, booktitle = {Proceedings of the Second International Conference on Weblogs and Social Media(ICWSM 2008)}, interhash = {13ec3f45fc7e0364cdc6b9a7c12c5c2c}, intrahash = {359e1eccdc524334d4a2ad51330f76ae}, publisher = {AAAI Press}, title = {Logsonomy — A Search Engine Folksonomy}, url = {http://www.kde.cs.uni-kassel.de/hotho/pub/2008/Krause2008logsonomy_short.pdf}, year = 2008 }