@article{journals/nle/ZeschG10, author = {Zesch, Torsten and Gurevych, Iryna}, ee = {http://dx.doi.org/10.1017/S1351324909990167}, interhash = {3300b5457187d0f6c551e63ecb27336c}, intrahash = {76a512bbba5ba8ec5819d469d4611d81}, journal = {Natural Language Engineering}, number = 1, pages = {25-59}, title = {Wisdom of crowds versus wisdom of linguists - measuring the semantic relatedness of words.}, url = {http://dblp.uni-trier.de/db/journals/nle/nle16.html#ZeschG10}, volume = 16, year = 2010 } @inproceedings{mitchell2015, author = {Mitchell, T. and Cohen, W. and Hruscha, E. and Talukdar, P. and Betteridge, J. and Carlson, A. and Dalvi, B. and Gardner, M. and Kisiel, B. and Krishnamurthy, J. and Lao, N. and Mazaitis, K. and Mohammad, T. and Nakashole, N. and Platanios, E. and Ritter, A. and Samadi, M. and Settles, B. and Wang, R. and Wijaya, D. and Gupta, A. and Chen, X. and Saparov, A. and Greaves, M. and Welling, J.}, booktitle = {AAAI}, interhash = {52d0d71f6f5b332dabc1412f18e3a93d}, intrahash = {63070703e6bb812852cca56574aed093}, note = {: Never-Ending Learning in AAAI-2015}, title = {Never-Ending Learning}, url = {http://www.cs.cmu.edu/~wcohen/pubs.html}, year = 2015 } @misc{singer2014hyptrails, abstract = {When users interact with the Web today, they leave sequential digital trails on a massive scale. Examples of such human trails include Web navigation, sequences of online restaurant reviews, or online music play lists. Understanding the factors that drive the production of these trails can be useful for e.g., improving underlying network structures, predicting user clicks or enhancing recommendations. In this work, we present a general approach called HypTrails for comparing a set of hypotheses about human trails on the Web, where hypotheses represent beliefs about transitions between states. Our approach utilizes Markov chain models with Bayesian inference. The main idea is to incorporate hypotheses as informative Dirichlet priors and to leverage the sensitivity of Bayes factors on the prior for comparing hypotheses with each other. For eliciting Dirichlet priors from hypotheses, we present an adaption of the so-called (trial) roulette method. We demonstrate the general mechanics and applicability of HypTrails by performing experiments with (i) synthetic trails for which we control the mechanisms that have produced them and (ii) empirical trails stemming from different domains including website navigation, business reviews and online music played. Our work expands the repertoire of methods available for studying human trails on the Web.}, author = {Singer, Philipp and Helic, Denis and Hotho, Andreas and Strohmaier, Markus}, interhash = {54535487cdfa9024073c07e336e03d70}, intrahash = {07a19041ef1bfd5cef707e03d1510d5e}, note = {cite arxiv:1411.2844}, title = {HypTrails: A Bayesian Approach for Comparing Hypotheses about Human Trails on the Web}, url = {http://arxiv.org/abs/1411.2844}, year = 2014 } @article{singer2013computing, abstract = {In this article, the authors present a novel approach for computing semantic relatedness and conduct a large-scale study of it on Wikipedia. Unlike existing semantic analysis methods that utilize Wikipedia’s content or link structure, the authors propose to use human navigational paths on Wikipedia for this task. The authors obtain 1.8 million human navigational paths from a semi-controlled navigation experiment – a Wikipedia-based navigation game, in which users are required to find short paths between two articles in a given Wikipedia article network. The authors’ results are intriguing: They suggest that (i) semantic relatedness computed from human navigational paths may be more precise than semantic relatedness computed from Wikipedia’s plain link structure alone and (ii) that not all navigational paths are equally useful. Intelligent selection based on path characteristics can improve accuracy. The authors’ work makes an argument for expanding the existing arsenal of data sources for calculating semantic relatedness and to consider the utility of human navigational paths for this task.}, author = {Singer, Philipp and Niebler, Thomas and Strohmaier, Markus and Hotho, Andreas}, doi = {10.4018/ijswis.2013100103}, interhash = {3377abe1838bd1f650b317ed1fca4dfe}, intrahash = {5262c48a2e2791d28610712e3bf5cf55}, issn = {15526283}, journal = {International Journal on Semantic Web and Information Systems (IJSWIS)}, number = 4, pages = {41--70}, publisher = {IGI Global}, refid = {102707}, title = {Computing Semantic Relatedness from Human Navigational Paths: A Case Study on Wikipedia}, url = {http://services.igi-global.com/resolvedoi/resolve.aspx?doi=10.4018/ijswis.2013100103}, volume = 9, year = 2013 } @incollection{MASH:13, address = {Heidelberg, Germany}, author = {Mitzlaff, Folke and Atzmueller, Martin and Stumme, Gerd and Hotho, Andreas}, booktitle = {Complex Networks IV}, doi = {10.1007/978-3-642-36844-8_2}, editor = {Ghoshal, Gourab and Poncela-Casasnovas, Julia and Tolksdorf, Robert}, interhash = {bf333426bb7af5f01bf0c465c1bfe1fc}, intrahash = {0a35f1ed66fcd342a6a44d70c63fb735}, optisbn = {978-3-642-36843-1}, opturl = {http://dx.doi.org/10.1007/978-3-642-36844-8_2}, publisher = {Springer Verlag}, series = {Studies in Computational Intelligence}, title = {{Semantics of User Interaction in Social Media}}, volume = 476, year = 2013 } @electronic{antoniou2008semantic, address = {Cambridge, Mass.}, author = {Antoniou, Grigoris and Van Harmelen, Frank}, interhash = {9d8b8e2c7a3ba2a4ead6af2e6cd30037}, intrahash = {bd371cbd5236ead876afd692eefcb189}, isbn = {9780262255639 0262255634}, publisher = {MIT Press}, refid = {219707228}, title = {A semantic Web primer}, url = {http://www.ics.forth.gr/isl/swprimer/}, year = 2008 } @article{DBLP:journals/jise/YangK13, author = {Yang, Wen-Teng and Kao, Hung-Yu}, bibsource = {DBLP, http://dblp.uni-trier.de}, ee = {http://www.iis.sinica.edu.tw/page/jise/2013/201307_01.html}, interhash = {1c27ed73a081f71136c5d58067127342}, intrahash = {aec1722879a25d5eb2ee9035803d9218}, journal = {J. Inf. Sci. Eng.}, number = 4, pages = {615-630}, title = {Measuring Semantic Relatedness using Wikipedia Signed Network}, volume = 29, year = 2013 } @misc{haslhofer2013semantic, abstract = {Tags assigned by users to shared content can be ambiguous. As a possible solution, we propose semantic tagging as a collaborative process in which a user selects and associates Web resources drawn from a knowledge context. We applied this general technique in the specific context of online historical maps and allowed users to annotate and tag them. To study the effects of semantic tagging on tag production, the types and categories of obtained tags, and user task load, we conducted an in-lab within-subject experiment with 24 participants who annotated and tagged two distinct maps. We found that the semantic tagging implementation does not affect these parameters, while providing tagging relationships to well-defined concept definitions. Compared to label-based tagging, our technique also gathers positive and negative tagging relationships. We believe that our findings carry implications for designers who want to adopt semantic tagging in other contexts and systems on the Web.}, author = {Haslhofer, Bernhard and Robitza, Werner and Lagoze, Carl and Guimbretiere, Francois}, interhash = {84516aa456894b6d6adf86abd2386656}, intrahash = {a653f1a0a1ac5084e80757ec277b1184}, note = {cite arxiv:1304.1636Comment: 10 pages}, title = {Semantic Tagging on Historical Maps}, url = {http://arxiv.org/abs/1304.1636}, year = 2013 } @inproceedings{Kaur:2005:CLW:1054972.1054980, abstract = {A predictive tool to simulate human visual search behavior would help interface designers inform and validate their design. Such a tool would benefit from a semantic component that would help predict search behavior even in the absence of exact textual matches between goal and target. This paper discusses a comparison of three semantic systems-LSA, WordNet and PMI-IR-to evaluate their performance in predicting the link that people would select given an information goal and a webpage. PMI-IR best predicted human performance as observed in a user study.}, acmid = {1054980}, address = {New York, NY, USA}, author = {Kaur, Ishwinder and Hornof, Anthony J.}, booktitle = {Proceedings of the SIGCHI Conference on Human Factors in Computing Systems}, doi = {10.1145/1054972.1054980}, interhash = {ea35528c6c3ea3ca64cbbd6c6ae631ae}, intrahash = {f8c070cb738ea82a40838b1eb8257e31}, isbn = {1-58113-998-5}, location = {Portland, Oregon, USA}, numpages = {10}, pages = {51--60}, publisher = {ACM}, series = {CHI '05}, title = {A comparison of LSA, wordNet and PMI-IR for predicting user click behavior}, url = {http://doi.acm.org/10.1145/1054972.1054980}, year = 2005 } @incollection{tagging-cattuto, abstract = {{Collaborative tagging systems have nowadays become important data sources for populating semantic web applications. For tasks like synonym detection and discovery of concept hierarchies, many researchers introduced measures of tag similarity. Even though most of these measures appear very natural, their design often seems to be rather ad hoc, and the underlying assumptions on the notion of similarity are not made explicit. A more systematic characterization and validation of tag similarity in terms of formal representations of knowledge is still lacking. Here we address this issue and analyze several measures of tag similarity: Each measure is computed on data from the social bookmarking system del.icio.us and a semantic grounding is provided by mapping pairs of similar tags in the folksonomy to pairs of synsets in Wordnet, where we use validated measures of semantic distance to characterize the semantic relation between the mapped tags. This exposes important features of the investigated similarity measures and indicates which ones are better suited in the context of a given semantic application.}}, author = {Cattuto, Ciro and Benz, Dominik and Hotho, Andreas and Stumme, Gerd}, booktitle = {The Semantic Web - ISWC 2008}, citeulike-article-id = {4718854}, citeulike-linkout-0 = {http://dx.doi.org/10.1007/978-3-540-88564-1\_39}, citeulike-linkout-1 = {http://www.springerlink.com/content/9044260283881v78}, doi = {10.1007/978-3-540-88564-1\_39}, editor = {Sheth, Amit and Staab, Steffen and Dean, Mike and Paolucci, Massimo and Maynard, Diana and Finin, Timothy and Thirunarayan, Krishnaprasad}, interhash = {b44538648cfd476d6c94e30bc6626c86}, intrahash = {022ccb7184fcd0e43092fca13fd56a00}, journal = {The Semantic Web - ISWC 2008}, pages = {615--631}, posted-at = {2011-09-09 20:06:23}, priority = {2}, publisher = {Springer Berlin / Heidelberg}, series = {Lecture Notes in Computer Science}, title = {{Semantic Grounding of Tag Relatedness in Social Bookmarking Systems}}, url = {http://tagora-project.eu/wp-content/2009/09/cattuto_iswc2008.pdf}, volume = 5318, year = 2008 } @techreport{prudhommeaux2008sparql, abstract = {RDF is a directed, labeled graph data format for representing information in the Web. This specification defines the syntax and semantics of the SPARQL query language for RDF. SPARQL can be used to express queries across diverse data sources, whether the data is stored natively as RDF or viewed as RDF via middleware. SPARQL contains capabilities for querying required and optional graph patterns along with their conjunctions and disjunctions. SPARQL also supports extensible value testing and constraining queries by source RDF graph. The results of SPARQL queries can be results sets or RDF graphs. }, author = {Prud'hommeaux, Eric and Seaborne, Andy}, institution = {W3C}, interhash = {dc198da0f907f0129249cab866bbe3d4}, intrahash = {f156278e58de586730e51d791f3b5f69}, month = jan, title = {SPARQL Query Language for RDF}, type = {W3C Recommendation}, url = {http://www.w3.org/TR/rdf-sparql-query/}, year = 2008 } @article{bizer2009dbpedia, abstract = {The DBpedia project is a community effort to extract structured information from Wikipedia and to make this information accessible on the Web. The resulting DBpedia knowledge base currently describes over 2.6 million entities. For each of these entities, DBpedia defines a globally unique identifier that can be dereferenced over the Web into a rich RDF description of the entity, including human-readable definitions in 30 languages, relationships to other resources, classifications in four concept hierarchies, various facts as well as data-level links to other Web data sources describing the entity. Over the last year, an increasing number of data publishers have begun to set data-level links to DBpedia resources, making DBpedia a central interlinking hub for the emerging Web of Data. Currently, the Web of interlinked data sources around DBpedia provides approximately 4.7 billion pieces of information and covers domains such as geographic information, people, companies, films, music, genes, drugs, books, and scientific publications. This article describes the extraction of the DBpedia knowledge base, the current status of interlinking DBpedia with other data sources on the Web, and gives an overview of applications that facilitate the Web of Data around DBpedia.}, author = {Bizer, Christian and Lehmann, Jens and Kobilarov, Georgi and Auer, Sören and Becker, Christian and Cyganiak, Richard and Hellmann, Sebastian}, doi = {10.1016/j.websem.2009.07.002}, interhash = {087f766f30469cbc881c83ad156a104a}, intrahash = {560097dc36a8e66b69db5cb22c1fa334}, issn = {1570-8268}, journal = {Web Semantics: Science, Services and Agents on the World Wide Web}, number = 3, pages = {154--165}, title = {DBpedia - A crystallization point for the Web of Data}, url = {http://www.sciencedirect.com/science/article/pii/S1570826809000225}, volume = 7, year = 2009 } @inproceedings{suchanek2007semantic, abstract = {We present YAGO, a light-weight and extensible ontology with high coverage and quality. YAGO builds on entities and relations and currently contains more than 1 million entities and 5 million facts. This includes the Is-A hierarchy as well as non-taxonomic relations between entities (such as HASONEPRIZE). The facts have been automatically extracted from Wikipedia and unified with WordNet, using a carefully designed combination of rule-based and heuristic methods described in this paper. The resulting knowledge base is a major step beyond WordNet: in quality by adding knowledge about individuals like persons, organizations, products, etc. with their semantic relationships - and in quantity by increasing the number of facts by more than an order of magnitude. Our empirical evaluation of fact correctness shows an accuracy of about 95%. YAGO is based on a logically clean model, which is decidable, extensible, and compatible with RDFS. Finally, we show how YAGO can be further extended by state-of-the-art information extraction techniques.}, acmid = {1242667}, address = {New York, NY, USA}, author = {Suchanek, Fabian M. and Kasneci, Gjergji and Weikum, Gerhard}, booktitle = {Proceedings of the 16th international conference on World Wide Web}, doi = {10.1145/1242572.1242667}, interhash = {1d2c2b23ce2a6754d12c4364e19c574c}, intrahash = {84ae693c0a6dfb6d4b051b0b6dbd3668}, isbn = {978-1-59593-654-7}, location = {Banff, Alberta, Canada}, numpages = {10}, pages = {697--706}, publisher = {ACM}, title = {YAGO: a core of semantic knowledge}, url = {http://doi.acm.org/10.1145/1242572.1242667}, year = 2007 } @incollection{auer2007dbpedia, abstract = {DBpedia is a community effort to extract structured information from Wikipedia and to make this information available on the Web. DBpedia allows you to ask sophisticated queries against datasets derived from Wikipedia and to link other datasets on the Web to Wikipedia data. We describe the extraction of the DBpedia datasets, and how the resulting information is published on the Web for human- and machine-consumption. We describe some emerging applications from the DBpedia community and show how website authors can facilitate DBpedia content within their sites. Finally, we present the current status of interlinking DBpedia with other open datasets on the Web and outline how DBpedia could serve as a nucleus for an emerging Web of open data.}, address = {Berlin/Heidelberg}, author = {Auer, Sören and Bizer, Christian and Kobilarov, Georgi and Lehmann, Jens and Cyganiak, Richard and Ives, Zachary}, booktitle = {The Semantic Web}, doi = {10.1007/978-3-540-76298-0_52}, editor = {Aberer, Karl and Choi, Key-Sun and Noy, Natasha and Allemang, Dean and Lee, Kyung-Il and Nixon, Lyndon and Golbeck, Jennifer and Mika, Peter and Maynard, Diana and Mizoguchi, Riichiro and Schreiber, Guus and Cudré-Mauroux, Philippe}, interhash = {ba9f8a17de78f7864934ddb96afa67df}, intrahash = {b00f9f95ba1970164ad70aa227719c6e}, isbn = {978-3-540-76297-3}, pages = {722--735}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, title = {DBpedia: A Nucleus for a Web of Open Data}, url = {http://dx.doi.org/10.1007/978-3-540-76298-0_52}, volume = 4825, year = 2007 } @article{horrocks2003making, abstract = {The OWL Web Ontology Language is a new formal language for representing ontologies in the Semantic Web. OWL has features from several families of representation languages, including primarily Description Logics and frames. OWL also shares many characteristics with RDF, the W3C base of the Semantic Web. In this paper, we discuss how the philosophy and features of OWL can be traced back to these older formalisms, with modifications driven by several other constraints on OWL. Several interesting problems have arisen where these influences on OWL have clashed.}, author = {Horrocks, Ian and Patel-Schneider, Peter F. and van Harmelen, Frank}, doi = {10.1016/j.websem.2003.07.001}, interhash = {2db7912282384be060b75ebb445dd5d1}, intrahash = {6944f3b978dec082ffa284fe6bf7ac1a}, issn = {1570-8268}, journal = {Web Semantics: Science, Services and Agents on the World Wide Web}, number = 1, pages = {7-26}, title = {From SHIQ and RDF to OWL: the making of a Web Ontology Language}, url = {http://www.sciencedirect.com/science/article/pii/S1570826803000027}, volume = 1, year = 2003 } @inproceedings{Yeh:2009:WRW:1708124.1708133, abstract = {Computing semantic relatedness of natural language texts is a key component of tasks such as information retrieval and summarization, and often depends on knowledge of a broad range of real-world concepts and relationships. We address this knowledge integration issue by computing semantic relatedness using personalized PageRank (random walks) on a graph derived from Wikipedia. This paper evaluates methods for building the graph, including link selection strategies, and two methods for representing input texts as distributions over the graph nodes: one based on a dictionary lookup, the other based on Explicit Semantic Analysis. We evaluate our techniques on standard word relatedness and text similarity datasets, finding that they capture similarity information complementary to existing Wikipedia-based relatedness measures, resulting in small improvements on a state-of-the-art measure.}, acmid = {1708133}, address = {Stroudsburg, PA, USA}, author = {Yeh, Eric and Ramage, Daniel and Manning, Christopher D. and Agirre, Eneko and Soroa, Aitor}, booktitle = {Proceedings of the 2009 Workshop on Graph-based Methods for Natural Language Processing}, interhash = {8b28cd800b6ad3929eef3b45de997e51}, intrahash = {ffd20a7357ca8e87d46e516589a7769e}, isbn = {978-1-932432-54-1}, location = {Suntec, Singapore}, numpages = {9}, pages = {41--49}, publisher = {Association for Computational Linguistics}, series = {TextGraphs-4}, title = {WikiWalk: random walks on Wikipedia for semantic relatedness}, url = {http://dl.acm.org/citation.cfm?id=1708124.1708133}, year = 2009 } @inproceedings{West2009, abstract = {Computing the semantic distance between realworld concepts is crucial for many intelligent applications. We present a novel method that leverages data from 'Wikispeedia', an online game played on Wikipedia; players have to reach an article from another, unrelated article, only by clicking links in the articles encountered. In order to automatically infer semantic distances between everyday concepts, our method effectively extracts the common sense displayed by humans during play, and is thus more desirable, from a cognitive point of view, than purely corpus-based methods. We show that our method significantly outperforms Latent Semantic Analysis in a psychometric evaluation of the quality of learned semantic distances.}, acmid = {1661702}, address = {San Francisco, CA, USA}, author = {West, Robert and Pineau, Joelle and Precup, Doina}, booktitle = {Proceedings of the 21st international jont conference on Artifical intelligence}, interhash = {fc6cba2420ca9c6d0b82f602a07255cd}, intrahash = {650cbc558eb7dedff546cdc16033707e}, location = {Pasadena, California, USA}, numpages = {6}, pages = {1598--1603}, publisher = {Morgan Kaufmann Publishers Inc.}, series = {IJCAI'09}, title = {Wikispeedia: an online game for inferring semantic distances between concepts}, url = {http://dl.acm.org/citation.cfm?id=1661445.1661702}, year = 2009 } @inproceedings{martins2008extracting, abstract = {Geo-temporal criteria are important for filtering, grouping and prioritizing information resources. This presents techniques for extracting semantic geo-temporal information from text, using simple text mining methods that leverage on a gazetteer. A prototype system, implementing the proposed methods and capable of displaying information over maps and timelines, is described. This prototype can take input in RSS, demonstrating the application to content from many different online sources. Experimental results demonstrate the efficiency and accuracy of the proposed approaches.}, author = {Martins, B. and Manguinhas, H. and Borbinha, J.}, booktitle = {Proceedings of the International Conference on Semantic Computing}, doi = {10.1109/ICSC.2008.86}, interhash = {d03fecb6b3261ffa0a5e11789b188883}, intrahash = {5a889bc7d9e81cb1d294cb83b767bf64}, month = aug, pages = {1--9}, publisher = {IEEE Computer Society}, title = {Extracting and Exploring the Geo-Temporal Semantics of Textual Resources}, url = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=4597167}, year = 2008 } @article{goodwin2008geographical, abstract = {Ordnance Survey, the national mapping agency of Great Britain, is investigating how semantic web technologies assist its role as a geographical information provider. A major part of this work involves the development of prototype products and datasets in RDF. This article discusses the production of an example dataset for the administrative geography of Great Britain, demonstrating the advantages of explicitly encoding topological relations between geographic entities over traditional spatial queries. We also outline how these data can be linked to other datasets on the web of linked data and some of the challenges that this raises.}, author = {Goodwin, John and Dolbear, Catherine and Hart, Glen}, doi = {10.1111/j.1467-9671.2008.01133.x}, interhash = {ea248d549690eceb8e7aa06ccb24e226}, intrahash = {08412bb4afca1e86d0cca0a8a083f2a2}, issn = {1467-9671}, journal = {Transactions in GIS}, pages = {19--30}, publisher = {Blackwell Publishing Ltd}, title = {Geographical Linked Data: The Administrative Geography of Great Britain on the Semantic Web}, url = {http://dx.doi.org/10.1111/j.1467-9671.2008.01133.x}, volume = 12, year = 2008 } @inproceedings{jeffery2008payasyougo, abstract = {A primary challenge to large-scale data integration is creating semantic equivalences between elements from different data sources that correspond to the same real-world entity or concept. Dataspaces propose a pay-as-you-go approach: automated mechanisms such as schema matching and reference reconciliation provide initial correspondences, termed candidate matches, and then user feedback is used to incrementally confirm these matches. The key to this approach is to determine in what order to solicit user feedback for confirming candidate matches.

In this paper, we develop a decision-theoretic framework for ordering candidate matches for user confirmation using the concept of the value of perfect information (VPI). At the core of this concept is a utility function that quantifies the desirability of a given state; thus, we devise a utility function for dataspaces based on query result quality. We show in practice how to efficiently apply VPI in concert with this utility function to order user confirmations. A detailed experimental evaluation on both real and synthetic datasets shows that the ordering of user feedback produced by this VPI-based approach yields a dataspace with a significantly higher utility than a wide range of other ordering strategies. Finally, we outline the design of Roomba, a system that utilizes this decision-theoretic framework to guide a dataspace in soliciting user feedback in a pay-as-you-go manner.}, acmid = {1376701}, address = {New York, NY, USA}, author = {Jeffery, Shawn R. and Franklin, Michael J. and Halevy, Alon Y.}, booktitle = {Proceedings of the 2008 ACM SIGMOD international conference on Management of data}, doi = {10.1145/1376616.1376701}, interhash = {3ceaf563712b776c1ed97a8cb061f63b}, intrahash = {3bff24fb9eb1e39fa97a524aabb8dee9}, isbn = {978-1-60558-102-6}, location = {Vancouver, Canada}, numpages = {14}, pages = {847--860}, publisher = {ACM}, title = {Pay-as-you-go user feedback for dataspace systems}, url = {http://doi.acm.org/10.1145/1376616.1376701}, year = 2008 }