@article{strohmaier2011evaluation, author = {Strohmaier, Markus and Helic, Denis and Benz, Dominik and Körner, Christian and Kern, Roman}, interhash = {87e110b0ade230877db6855cacabcb4d}, intrahash = {603161eb4c5b2f87f3d3a50f87015337}, journal = {Transactions on Intelligent Systems and Technology}, title = {Evaluation of Folksonomy Induction Algorithms}, url = {http://tist.acm.org/index.html}, vgwort = {43}, year = 2012 } @inproceedings{benz2007position, abstract = {The emergence of collaborative tagging systems with their underlying flat and uncontrolled resource organization paradigm has led to a large number of research activities focussing on a formal description and analysis of the resulting “folksonomies??. An interesting outcome is that the characteristic qualities of these systems seem to be inverse to more traditional knowledge structuring approaches like taxonomies or ontologies: The latter provide rich and precise semantics, but suffer - amongst others - from a knowledge acquisition bottleneck. An important step towards exploiting the possible synergies by bridging the gap between both paradigms is the automatic extraction of relations between tags in a folksonomy. This position paper presents preliminary results of ongoing work to induce hierarchical relationships among tags by analyzing the aggregated data of collaborative tagging systems as a basis for an ontology learning procedure.}, author = {Benz, Dominik and Hotho, Andreas}, booktitle = {Workshop Proceedings of Lernen - Wissensentdeckung - Adaptivität (LWA 2007)}, editor = {Hinneburg, Alexander}, file = {benz2007position.pdf:benz2007position.pdf:PDF}, groups = {public}, interhash = {ff7de5717f771dabd764675279ff3adf}, intrahash = {72bff5ebe5dfb5023f62ba9b94e6ed01}, isbn = {978-3-86010-907-6}, month = sep, note = {http://lwa07.informatik.uni-halle.de/kdml07/kdml07.htm}, pages = {109--112}, publisher = {Martin-Luther-Universität Halle-Wittenberg}, title = {Position Paper: Ontology Learning from Folksonomies}, url = {http://www.kde.cs.uni-kassel.de/pub/pdf/benz2007position.pdf}, username = {dbenz}, year = 2007 } @article{strohmaier2011evaluation, author = {Strohmaier, Markus and Helic, Denis and Benz, Dominik and Körner, Christian and Kern, Roman}, interhash = {87e110b0ade230877db6855cacabcb4d}, intrahash = {603161eb4c5b2f87f3d3a50f87015337}, journal = {Transactions on Intelligent Systems and Technology}, title = {Evaluation of Folksonomy Induction Algorithms}, url = {http://tist.acm.org/index.html}, vgwort = {43}, year = 2012 } @incollection{cimiano2009ontology, affiliation = {University of Karlsruhe Institute AIFB Karlsruhe Germany}, author = {Cimiano, Philipp and M{\"{a}}dche, Alexander and Staab, Steffen and V{\"{o}}lker, Johanna}, booktitle = {Handbook on Ontologies}, editor = {Staab, Steffen and Studer, Rudi}, interhash = {884c5b59450bf7982a4345f523181404}, intrahash = {3081beee709710cd12ca402a00526ef2}, isbn = {978-3-540-92673-3}, keyword = {Economics/Management Science}, pages = {245-267}, publisher = {Springer Berlin Heidelberg}, series = {International Handbooks Information System}, title = {Ontology Learning}, url = {http://dx.doi.org/10.1007/978-3-540-92673-3_11}, year = 2009 } @article{studer1998knowledge, abstract = {This paper gives an overview about the development of the field of Knowledge Engineering over the last 15 years. We discuss the paradigm shift from a transfer view to a modeling view and describe two approaches which considerably shaped research in Knowledge Engineering: Role-limiting Methods and Generic Tasks. To illustrate various concepts and methods which evolved in the last years we describe three modeling frameworks: CommonKADS, MIKE, and PROTÉGÉ-II. This description is supplemented by discussing some important methodological developments in more detail: specification languages for knowledge-based systems, problem-solving methods, and ontologies. We conclude with outlining the relationship of Knowledge Engineering to Software Engineering, Information Integration and Knowledge Management.}, author = {Studer, Rudi and Benjamins, Richard R. and Fensel, Dieter}, citeseerurl = {studer98knowledge.html}, citeulike-article-id = {121525}, comment = {ontologies capture static knowledge}, interhash = {68b8b754b1eb74a6c5d9313933b61b6a}, intrahash = {5f5f2584d7313b47172a3eab121d0069}, journal = {Data Knowledge Engineering}, number = {1-2}, pages = {161--197}, priority = {0}, title = {Knowledge {E}ngineering: {P}rinciples and {M}ethods}, url = {http://www.cs.toronto.edu/~nernst/papers/studer98knowledge.pdf}, volume = 25, year = 1998 } @book{cimiano2006ontology, author = {Cimiano, Philipp}, ee = {http://dx.doi.org/10.1007/978-0-387-39252-3}, interhash = {f8a70c22cfd162dc9ad2cd977d79b66c}, intrahash = {09ab696de72e68b0b2aaf21ae3b0b613}, isbn = {978-0-387-30632-2}, pages = {I-XXVIII, 1-347}, publisher = {Springer}, title = {Ontology learning and population from text - algorithms, evaluation and applications.}, year = 2006 } @unpublished{weichselbraun2011ontology, author = {Weichselbraun, Albert}, file = {weichselbraun2011ontology.pdf:weichselbraun2011ontology.pdf:PDF}, groups = {public}, interhash = {c3af6c9fe13d263f0d277c40bf2471cc}, intrahash = {9f6febd6e835d24edb2547ad39bf36f4}, note = {Presentation Slides only}, timestamp = {2011.07.29}, title = {Ontology Learning based on Text Mining and Social Evidence Sources}, url = {http://eprints.weblyzard.com/27/}, username = {dbenz}, year = 2011 } @mastersthesis{keller2010theoretical, author = {Keller, Christine}, file = {keller2010theoretical.pdf:keller2010theoretical.pdf:PDF}, groups = {public}, interhash = {1b3c1123ec9de6b4997ca24cb9e658fd}, intrahash = {5e7a5d5d2ff00af9a914b3a547ca3c48}, school = {Universität Stuttgart}, timestamp = {2011.07.29}, title = {Theoretical and Practical Perspectives on Ontology Learning from Folksonomies}, username = {dbenz}, year = 2010 } @mastersthesis{meder2010multidomain, author = {Meder, Michael}, groups = {public}, interhash = {c344c636c94156ba014c020d9e16b1e5}, intrahash = {7ef2f23103d4c0ed0ad344f9ead8db9d}, school = {Technische Universität Berlin}, timestamp = {2011.07.20}, title = {Multi-Domain Klassifikation basierend auf nutzergenerierten Metadaten}, username = {dbenz}, year = 2010 } @inproceedings{widdows2002graph, author = {Widdows, Dominic and Dorow, Beate}, bibsource = {DBLP, http://dblp.uni-trier.de}, booktitle = {COLING}, ee = {http://acl.ldc.upenn.edu/C/C02/C02-1114.pdf}, interhash = {778db99ef80f4b5a682eb6923cc0eb13}, intrahash = {a16325d6196b3adb8e68851f4f4eff84}, title = {A Graph Model for Unsupervised Lexical Acquisition}, year = 2002 } @inproceedings{angeletou2008semantically, abstract = {Abstract. While the increasing popularity of folksonomies has lead to a vast quantity of tagged data, resource retrieval in folksonomies is limited by being agnostic to the meaning (i.e., semantics) of tags. Our goal is to automatically enrich folksonomy tags (and implicitly the related resources) with formal semantics by associating them to relevant concepts defined in online ontologies. We introduce FLOR, a method that performs automatic folksonomy enrichment by combining knowledge from WordNet and online available ontologies. Experimentally testing FLOR, we found that it correctly enriched 72 % of 250 Flickr photos. 1}, author = {Angeletou, Sofia and Sabou, Marta and Motta, Enrico}, booktitle = {Proceedings of the CISWeb Workshop, located at the 5th European Semantic Web Conference ESWC 2008}, file = {angeletou2008semantically.pdf:angeletou2008semantically.pdf:PDF}, groups = {public}, institution = {CiteSeerX - Scientific Literature Digital Library and Search Engine [http://citeseerx.ist.psu.edu/oai2] (United States)}, interhash = {1b244d0220730e994822192f6e1cba76}, intrahash = {e6404fa071680b21905ef7f3255359f7}, location = {http://www.scientificcommons.org/47680629}, timestamp = {2011-02-17 10:55:55}, title = {Semantically enriching folksonomies with FLOR}, url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.141.2569}, username = {dbenz}, year = 2008 } @inproceedings{garcia2009preliminary, abstract = {The availability of tag-based user-generated content for a variety of Web resources (music, photos, videos, text, etc.) has largely increased in the last years. Users can assign tags freely and then use them to share and retrieve information. However, tag-based sharing and retrieval is not optimal due to the fact that tags are plain text labels without an explicit or formal meaning, and hence polysemy and synonymy should be dealt with appropriately. To ameliorate these problems, we propose a context-based tag disambiguation algorithm that selects the meaning of a tag among a set of candidate DBpedia entries, using a common information retrieval similarity measure. The most similar DBpedia en-try is selected as the one representing the meaning of the tag. We describe and analyze some preliminary results, and discuss about current challenges in this area.}, author = {Garcia, Andres and Szomszor, Martin and Alani, Harith and Corcho, Oscar}, booktitle = {Knowledge Capture (K-Cap'09) - First International Workshop on Collective Knowledge Capturing and Representation - CKCaR'09}, file = {garcia2009preliminary.pdf:garcia2009preliminary.pdf:PDF}, groups = {public}, interhash = {5da3fa037c8f1bc0b4a6255a46e08077}, intrahash = {dfe0fee496a65763bcfae4070ffcf47e}, month = {September}, timestamp = {2011-02-17 10:59:45}, title = {Preliminary Results in Tag Disambiguation using DBpedia}, url = {http://eprints.ecs.soton.ac.uk/17792/}, username = {dbenz}, year = 2009 } @inproceedings{ireson2010toponym, abstract = {Increasingly user-generated content is being utilised as a source of information, however each individual piece of content tends to contain low levels of information. In addition, such information tends to be informal and imperfect in nature; containing imprecise, subjective, ambiguous expressions. However the content does not have to be interpreted in isolation as it is linked, either explicitly or implicitly, to a network of interrelated content; it may be grouped or tagged with similar content, comments may be added by other users or it may be related to other content posted at the same time or by the same author or members of the author's social network. This paper generally examines how ambiguous concepts within user-generated content can be assigned a specific/formal meaning by considering the expanding context of the information, i.e. other information contained within directly or indirectly related content, and specifically considers the issue of toponym resolution of locations.}, author = {Ireson, Neil and Ciravegna, Fabio}, booktitle = {#iswc2010#}, crossref = {conf/semweb/2010-1}, editor = {Patel-Schneider, Peter F. and Pan, Yue and Hitzler, Pascal and Mika, Peter and Zhang, Lei and Pan, Jeff Z. and Horrocks, Ian and Glimm, Birte}, ee = {http://dx.doi.org/10.1007/978-3-642-17746-0_24}, file = {ireson2010toponym.pdf:ireson2010toponym.pdf:PDF}, groups = {public}, interhash = {fd064c5fb724a5a72a6a67d1f6a7f8df}, intrahash = {1b0c968b68745971cef000eb3644ba3a}, isbn = {978-3-642-17745-3}, pages = {370-385}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, timestamp = {2011-02-02 15:00:36}, title = {Toponym Resolution in Social Media.}, url = {http://dblp.uni-trier.de/db/conf/semweb/iswc2010-1.html#IresonC10}, username = {dbenz}, volume = 6496, year = 2010 } @inproceedings{kennedy2007how, abstract = {The advent of media-sharing sites like Flickr and YouTube has drastically increased the volume of community-contributed multimedia resources available on the web. These collections have a previously unimagined depth and breadth, and have generated new opportunities – and new challenges – to multimedia research. How do we analyze, understand and extract patterns from these new collections? How can we use these unstructured, unrestricted community contributions of media (and annotation) to generate “knowledge�?? As a test case, we study Flickr – a popular photo sharing website. Flickr supports photo, time and location metadata, as well as a light-weight annotation model. We extract information from this dataset using two different approaches. First, we employ a location-driven approach to generate aggregate knowledge in the form of “representative tags�? for arbitrary areas in the world. Second, we use a tag-driven approach to automatically extract place and event semantics for Flickr tags, based on each tag’s metadata patterns. With the patterns we extract from tags and metadata, vision algorithms can be employed with greater precision. In particular, we demonstrate a location-tag-vision-based approach to retrieving images of geography-related landmarks and features from the Flickr dataset. The results suggest that community-contributed media and annotation can enhance and improve our access to multimedia resources – and our understanding of the world.}, address = {New York, NY, USA}, author = {Kennedy, Lyndon and Naaman, Mor and Ahern, Shane and Nair, Rahul and Rattenbury, Tye}, booktitle = {MULTIMEDIA '07: Proceedings of the 15th international conference on Multimedia}, citeulike-article-id = {2626639}, citeulike-linkout-0 = {http://portal.acm.org/citation.cfm?id=1291384}, citeulike-linkout-1 = {http://dx.doi.org/10.1145/1291233.1291384}, doi = {10.1145/1291233.1291384}, file = {kennedy2007how.pdf:kennedy2007how.pdf:PDF}, groups = {public}, interhash = {cd4acdd5a627c20e9effdbda54dd122d}, intrahash = {7069480c43ba5d41396e075307cd1af1}, isbn = {9781595937025}, pages = {631--640}, posted-at = {2009-06-25 14:41:53}, priority = {2}, publisher = {ACM}, timestamp = {2011-02-17 11:07:22}, title = {How flickr helps us make sense of the world: context and content in community-contributed media collections}, url = {http://dx.doi.org/10.1145/1291233.1291384}, username = {dbenz}, year = 2007 } @article{garciasilva2011review, abstract = {This paper describes and compares the most relevant approaches for associating tags with semantics in order to make explicit the meaning of those tags. We identify a common set of steps that are usually considered across all these approaches and frame our descriptions according to them, providing a unified view of how each approach tackles the different problems that appear during the semantic association process. Furthermore, we provide some recommendations on (a) how and when to use each of the approaches according to the characteristics of the data source, and (b) how to improve results by leveraging the strengths of the different approaches.}, author = {Garcia-Silva, Andres and Corcho, Oscar and Alani, Harith and Gomez-Perez, Asuncion}, file = {garciasilva2011review.pdf:garciasilva2011review.pdf:PDF}, groups = {public}, interhash = {ef913839d8ab1f3955a9d05c5ba2fadf}, intrahash = {42f77eb846bdae1847ea70ca5ba6c9ec}, journal = {Knowledge Engineering Review}, month = {December}, number = 4, timestamp = {2011-02-15 03:13:28}, title = {Review of the state of the art: Discovering and Associating Semantics to Tags in Folksonomies}, username = {dbenz}, volume = 26, year = 2011 } @article{Hazman:30May2009:1744-2621:24, abstract = {Ontologies play a vital role in many web- and internet-related applications. This work presents a system for accelerating the ontology building process via semi-automatically learning a hierarchal ontology given a set of domain-specific web documents and a set of seed concepts. The methods are tested with web documents in the domain of agriculture. The ontology is constructed through the use of two complementary approaches. The presented system has been used to build an ontology in the agricultural domain using a set of Arabic extension documents and evaluated against a modified version of the AGROVOC ontology.}, author = {Hazman, Maryam and El-Beltagy, Samhaa R. and Rafea, Ahmed}, doi = {doi:10.1504/IJMSO.2009.026251}, interhash = {fe27d687bcba91a7a6fe51eec9a2b87d}, intrahash = {323c8bdedc8a4643232a498ac03d6407}, journal = {International Journal of Metadata, Semantics and Ontologies}, pages = {24-33(10)}, title = {Ontology learning from domain specific web documents}, url = {http://www.ingentaconnect.com/content/ind/ijmso/2009/00000004/F0020001/art00003}, volume = 4, year = 2009 } @inproceedings{tesconi2008semantify, abstract = {At present tagging is experimenting a great diffusion as the most adopted way to collaboratively classify resources over the Web. In this paper, after a detailed analysis of the attempts made to improve the organization and structure of tagging systems as well as the usefulness of this kind of social data, we propose and evaluate the Tag Disambiguation Algorithm, mining del.icio.us data. It allows to easily semantify the tags of the users of a tagging service: it automatically finds out for each tag the related concept of Wikipedia in order to describe Web resources through senses. On the basis of a set of evaluation tests, we analyze all the advantages of our sense-based way of tagging, proposing new methods to keep the set of users tags more consistent or to classify the tagged resources on the basis of Wikipedia categories, YAGO classes or Wordnet synsets. We discuss also how our semanitified social tagging data are strongly linked to DBPedia and the datasets of the Linked Data community. 1}, author = {Tesconi, Maurizio and Ronzano, Francesco and Marchetti, Andrea and Minutoli, Salvatore}, booktitle = {Proceedings of the Workshop Social Data on the Web (SDoW2008)}, crossref = {CEUR-WS.org/Vol-405}, file = {tesconi2008semantify.pdf:tesconi2008semantify.pdf:PDF}, groups = {public}, interhash = {0c1c96b41a0af8512c20a7d41504640f}, intrahash = {dd698b5ee4d93496d11627cbe1615514}, timestamp = {2009-09-27 15:57:13}, title = {Semantify del.icio.us: Automatically Turn your Tags into Senses}, url = {http://CEUR-WS.org/Vol-405/paper8.pdf}, username = {dbenz}, year = 2008 } @inproceedings{hjelm2008multilingual, abstract = {We present a system for taxonomy extraction, aimed at providing a taxonomic backbone in an ontology learning environment. We follow previous research in using hierarchical clustering based on distributional similarity of the terms in texts. We show that basing the clustering on a comparable corpus in four languages gives a considerable improvement in accuracy compared to using only the monolingual English texts. We also show that hierarchical k-means clustering increases the similarity to the original taxonomy, when compared with a bottom-up agglomerative clustering approach.}, author = {Hjelm, Hans and Buitelaar, Paul}, booktitle = {ECAI}, crossref = {conf/ecai/2008}, editor = {Ghallab, Malik and Spyropoulos, Constantine D. and Fakotakis, Nikos and Avouris, Nikolaos M.}, ee = {http://dx.doi.org/10.3233/978-1-58603-891-5-288}, file = {hjelm2008multilingual.pdf:hjelm2008multilingual.pdf:PDF}, groups = {public}, interhash = {21a658154fb1a02e773b7a678b15f9f4}, intrahash = {813903a333a40ecf9a59ded552acb323}, isbn = {978-1-58603-891-5}, pages = {288-292}, publisher = {IOS Press}, series = {Frontiers in Artificial Intelligence and Applications}, timestamp = {2011-01-18 12:06:01}, title = {Multilingual Evidence Improves Clustering-based Taxonomy Extraction.}, url = {http://www.ling.su.se/staff/hans/artiklar/ecai2008-hjelm-buitelaar.pdf}, username = {dbenz}, volume = 178, year = 2008 } @inproceedings{auer2007what, abstract = {Wikis are established means for the collaborative authoring, versioning and publishing of textual articles. The Wikipedia project, for example, succeeded in creating the by far largest encyclopedia just on the basis of a wiki. Recently, several approaches have been proposed on how to extend wikis to allow the creation of structured and semantically enriched content. However, the means for creating semantically enriched structured content are already available and are, although unconsciously, even used by Wikipedia authors. In this article, we present a method for revealing this structured content by extracting information from template instances. We suggest ways to efficiently query the vast amount of extracted information (e.g. more than 8 million RDF statements for the English Wikipedia version alone), leading to astonishing query answering possibilities (such as for the title question). We analyze the quality of the extracted content, and propose strategies for quality improvements with just minor modifications of the wiki systems being currently used.}, author = {Auer, S{\"o}ren and Lehmann, Jens}, bibsource = {DBLP, http://dblp.uni-trier.de}, booktitle = {ESWC}, crossref = {DBLP:conf/esws/2007}, ee = {http://dx.doi.org/10.1007/978-3-540-72667-8_36}, file = {auer2007what.pdf:auer2007what.pdf:PDF}, groups = {public}, interhash = {2b70ab546da1b45f5350d3ff742c4288}, intrahash = {b8e464b4a672530bf91c9189f17cca73}, pages = {503-517}, timestamp = {2010-02-23 14:49:49}, title = {What Have Innsbruck and Leipzig in Common? Extracting Semantics from Wiki Content}, url = {http://www.springerlink.com/content/3131t21p634191n2/}, username = {dbenz}, year = 2007 } @inproceedings{baezayates2007extracting, abstract = {In this paper we study a large query log of more than twenty million queries with the goal of extracting the semantic relations that are implicitly captured in the actions of users submitting queries and clicking answers. Previous query log analyses were mostly done with just the queries and not the actions that followed after them. We first propose a novel way to represent queries in a vector space based on a graph derived from the query-click bipartite graph. We then analyze the graph produced by our query log, showing that it is less sparse than previous results suggested, and that almost all the measures of these graphs follow power laws, shedding some light on the searching user behavior as well as on the distribution of topics that people want in the Web. The representation we introduce allows to infer interesting semantic relationships between queries. Second, we provide an experimental analysis on the quality of these relations, showing that most of them are relevant. Finally we sketch an application that detects multitopical URLs.}, address = {New York, NY, USA}, author = {Baeza-Yates, Ricardo and Tiberi, Alessandro}, booktitle = {KDD '07: Proceedings of the 13th ACM SIGKDD international conference on Knowledge discovery and data mining}, doi = {http://doi.acm.org/10.1145/1281192.1281204}, file = {baezayates2007extracting.pdf:baezayates2007extracting.pdf:PDF}, groups = {public}, interhash = {26ca034be705abaf072835784f53d877}, intrahash = {6e45b65feffd1545c6dca62bf4b8f53d}, isbn = {978-1-59593-609-7}, location = {San Jose, California, USA}, pages = {76--85}, publisher = {ACM}, timestamp = {2009-06-01 15:31:03}, title = {Extracting semantic relations from query logs}, url = {http://portal.acm.org/citation.cfm?id=1281192.1281204}, username = {dbenz}, year = 2007 }