@inproceedings{chrupala2010named, author = {Chrupala, Grzegorz and Klakow, Dietrich}, booktitle = {LREC}, crossref = {conf/lrec/2010}, editor = {Calzolari, Nicoletta and Choukri, Khalid and Maegaard, Bente and Mariani, Joseph and Odijk, Jan and Piperidis, Stelios and Rosner, Mike and Tapias, Daniel}, ee = {http://www.lrec-conf.org/proceedings/lrec2010/summaries/538.html}, interhash = {85b8f5e04b66df3fe9411fc8f81ae43a}, intrahash = {68b98f37dc2dd0a89f580d9e6b65c780}, isbn = {2-9517408-6-7}, publisher = {European Language Resources Association}, title = {A Named Entity Labeler for German: Exploiting Wikipedia and Distributional Clusters.}, url = {http://lexitron.nectec.or.th/public/LREC-2010_Malta/pdf/538_Paper.pdf}, year = 2010 } @inproceedings{zesch2007analysis, abstract = {In this paper, we discuss two graphs in Wikipedia (i) the article graph, and (ii) the category graph. We perform a graph-theoretic analysis of the category graph, and show that it is a scale-free, small world graph like other well-known lexical semantic networks. We substantiate our findings by transferring semantic relatedness algorithms defined on WordNet to the Wikipedia category graph. To assess the usefulness of the category graph as an NLP resource, we analyze its coverage and the performance of the transferred semantic relatedness algorithms. }, address = {Rochester}, author = {Zesch, Torsten and Gurevych, Iryna}, booktitle = {Proceedings of the TextGraphs-2 Workshop (NAACL-HLT)}, interhash = {0401e62edb9bfa85dd498cb40301c0cb}, intrahash = {332ed720a72bf069275f93485432314b}, month = apr, pages = {1--8}, publisher = {Association for Computational Linguistics}, title = {Analysis of the Wikipedia Category Graph for NLP Applications}, url = {http://acl.ldc.upenn.edu/W/W07/W07-02.pdf#page=11}, year = 2007 } @inproceedings{mendes2011dbpedia, abstract = {Interlinking text documents with Linked Open Data enables the Web of Data to be used as background knowledge within document-oriented applications such as search and faceted browsing. As a step towards interconnecting the Web of Documents with the Web of Data, we developed DBpedia Spotlight, a system for automatically annotating text documents with DBpedia URIs. DBpedia Spotlight allows users to configure the annotations to their specific needs through the DBpedia Ontology and quality measures such as prominence, topical pertinence, contextual ambiguity and disambiguation confidence. We compare our approach with the state of the art in disambiguation, and evaluate our results in light of three baselines and six publicly available annotation systems, demonstrating the competitiveness of our system. DBpedia Spotlight is shared as open source and deployed as a Web Service freely available for public use.}, acmid = {2063519}, address = {New York, NY, USA}, author = {Mendes, Pablo N. and Jakob, Max and García-Silva, Andrés and Bizer, Christian}, booktitle = {Proceedings of the 7th International Conference on Semantic Systems}, doi = {10.1145/2063518.2063519}, interhash = {92df08698e5608afc6dc5b3e9be76880}, intrahash = {58fbb395741cce1d5370a6f205f24843}, isbn = {978-1-4503-0621-8}, location = {Graz, Austria}, numpages = {8}, pages = {1--8}, publisher = {ACM}, title = {DBpedia spotlight: shedding light on the web of documents}, url = {http://doi.acm.org/10.1145/2063518.2063519}, year = 2011 } @inproceedings{mihalcea2007wikify, abstract = {This paper introduces the use of Wikipedia as a resource for automatic keyword extraction and word sense disambiguation, and shows how this online encyclopedia can be used to achieve state-of-the-art results on both these tasks. The paper also shows how the two methods can be combined into a system able to automatically enrich a text with links to encyclopedic knowledge. Given an input document, the system identifies the important concepts in the text and automatically links these concepts to the corresponding Wikipedia pages. Evaluations of the system show that the automatic annotations are reliable and hardly distinguishable from manual annotations.}, acmid = {1321475}, address = {New York, NY, USA}, author = {Mihalcea, Rada and Csomai, Andras}, booktitle = {Proceedings of the sixteenth ACM Conference on information and knowledge management}, doi = {10.1145/1321440.1321475}, interhash = {8e00f4c1515b89a9a035c9d4b78d7bed}, intrahash = {4917a0c8eb1ea05b2d103166dfaeeb6e}, isbn = {978-1-59593-803-9}, location = {Lisbon, Portugal}, numpages = {10}, pages = {233--242}, publisher = {ACM}, title = {Wikify!: linking documents to encyclopedic knowledge}, url = {http://doi.acm.org/10.1145/1321440.1321475}, year = 2007 } @inproceedings{gunes2012eager, abstract = {Key to named entity recognition, the manual gazetteering of entity lists is a costly, errorprone process that often yields results that are incomplete and suffer from sampling bias. Exploiting current sources of structured information, we propose a novel method for extending minimal seed lists into complete gazetteers. Like previous approaches, we value W IKIPEDIA as a huge, well-curated, and relatively unbiased source of entities. However, in contrast to previous work, we exploit not only its content, but also its structure, as exposed in DBPEDIA. We extend gazetteers through Wikipedia categories, carefully limiting the impact of noisy categorizations. The resulting gazetteers easily outperform previous approaches on named entity recognition. }, author = {Gunes, Omer and Schallhart, Christian and Furche, Tim and Lehmann, Jens and Ngomo, Axel-Cyrille Ngonga}, booktitle = {Proceedings of the 3rd Workshop on the People's Web Meets NLP: Collaboratively Constructed Semantic Resources and their Applications to NLP}, interhash = {20c47a41c89ff6c2a8f7bb524185b8ac}, intrahash = {3eac4c009268cd4f2c264dd24053f8a6}, month = jul, organization = {Association for Computational Linguistics}, pages = {29--33}, title = {EAGER: extending automatically gazetteers for entity recognition}, url = {http://acl.eldoc.ub.rug.nl/mirror/W/W12/W12-4005.pdf}, year = 2012 } @inproceedings{takahashi2011evaluating, abstract = {We propose a method to evaluate the significance of historical entities (people, events, and so on.). Here, the significance of a historical entity means how it affected other historical entities. Our proposed method first calculates the tempo-spacial impact of historical entities. The impact of a historical entity varies according to time and location. Historical entities are collected from Wikipedia. We assume that a Wikipedia link between historical entities represents an impact propagation. That is, when an entity has a link to another entity, we regard the former is influenced by the latter. Historical entities in Wikipedia usually have the date and location of their occurrence. Our proposed iteration algorithm propagates such initial tempo-spacial information through links in the similar manner as PageRank, so the tempo-spacial impact scores of all the historical entities can be calculated. We assume that a historical entity is significant if it influences many other entities that are far from it temporally or geographically. We demonstrate a prototype system and show the results of experiments that prove the effectiveness of our method.}, acmid = {1995980}, address = {New York, NY, USA}, author = {Takahashi, Yuku and Ohshima, Hiroaki and Yamamoto, Mitsuo and Iwasaki, Hirotoshi and Oyama, Satoshi and Tanaka, Katsumi}, booktitle = {Proceedings of the 22nd ACM conference on Hypertext and hypermedia}, doi = {10.1145/1995966.1995980}, interhash = {6665836546bedb1ee5d56a4d16a0848e}, intrahash = {e4769d86e71c9e7ba77d5d4af6f21e0c}, isbn = {978-1-4503-0256-2}, location = {Eindhoven, The Netherlands}, numpages = {10}, pages = {83--92}, publisher = {ACM}, title = {Evaluating significance of historical entities based on tempo-spatial impacts analysis using Wikipedia link structure}, url = {http://doi.acm.org/10.1145/1995966.1995980}, year = 2011 } @inproceedings{ollivier2007finding, abstract = {We introduce a new method for finding nodes semantically related to a given node in a hyperlinked graph: the Green method, based on a classical Markov chain tool. It is generic, adjustment-free and easy to implement. We test it in the case of the hyperlink structure of the English version of Wikipedia, the on-line encyclopedia. We present an extensive comparative study of the performance of our method versus several other classical methods in the case of Wikipedia. The Green method is found to have both the best average results and the best robustness.}, acmid = {1619874}, author = {Ollivier, Yann and Senellart, Pierre}, booktitle = {Proceedings of the 22nd national conference on Artificial intelligence}, interhash = {a291b1b4e195dd09a11c8ffe329fc0e5}, intrahash = {76e219fe6e8a257b30c6665af8b273da}, isbn = {978-1-57735-323-2}, location = {Vancouver, British Columbia, Canada}, numpages = {7}, pages = {1427--1433}, publisher = {AAAI Press}, title = {Finding related pages using Green measures: an illustration with Wikipedia}, url = {http://dl.acm.org/citation.cfm?id=1619797.1619874}, volume = 2, year = 2007 } @incollection{auer2007dbpedia, abstract = {DBpedia is a community effort to extract structured information from Wikipedia and to make this information available on the Web. DBpedia allows you to ask sophisticated queries against datasets derived from Wikipedia and to link other datasets on the Web to Wikipedia data. We describe the extraction of the DBpedia datasets, and how the resulting information is published on the Web for human- and machine-consumption. We describe some emerging applications from the DBpedia community and show how website authors can facilitate DBpedia content within their sites. Finally, we present the current status of interlinking DBpedia with other open datasets on the Web and outline how DBpedia could serve as a nucleus for an emerging Web of open data.}, address = {Berlin/Heidelberg}, author = {Auer, Sören and Bizer, Christian and Kobilarov, Georgi and Lehmann, Jens and Cyganiak, Richard and Ives, Zachary}, booktitle = {The Semantic Web}, doi = {10.1007/978-3-540-76298-0_52}, editor = {Aberer, Karl and Choi, Key-Sun and Noy, Natasha and Allemang, Dean and Lee, Kyung-Il and Nixon, Lyndon and Golbeck, Jennifer and Mika, Peter and Maynard, Diana and Mizoguchi, Riichiro and Schreiber, Guus and Cudré-Mauroux, Philippe}, interhash = {ba9f8a17de78f7864934ddb96afa67df}, intrahash = {b00f9f95ba1970164ad70aa227719c6e}, isbn = {978-3-540-76297-3}, pages = {722--735}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, title = {DBpedia: A Nucleus for a Web of Open Data}, url = {http://dx.doi.org/10.1007/978-3-540-76298-0_52}, volume = 4825, year = 2007 } @inproceedings{derose2008building, abstract = {The rapid growth of Web communities has motivated many solutions for building community data portals. These solutions follow roughly two approaches. The first approach (e.g., Libra, Citeseer, Cimple) employs semi-automatic methods to extract and integrate data from a multitude of data sources. The second approach (e.g., Wikipedia, Intellipedia) deploys an initial portal in wiki format, then invites community members to revise and add material. In this paper we consider combining the above two approaches to building community portals. The new hybrid machine-human approach brings significant benefits. It can achieve broader and deeper coverage, provide more incentives for users to contribute, and keep the portal more up-to-date with less user effort. In a sense, it enables building "community wikipedias", backed by an underlying structured database that is continuously updated using automatic techniques. We outline our ideas for the new approach, describe its challenges and opportunities, and provide initial solutions. Finally, we describe a real-world implementation and preliminary experiments that demonstrate the utility of the new approach.}, author = {DeRose, P. and Chai, Xiaoyong and Gao, B.J. and Shen, W. and Doan, An Hai and Bohannon, P. and Zhu, Xiaojin}, booktitle = {24th International Conference on Data Engineering}, doi = {10.1109/ICDE.2008.4497473}, interhash = {00f45357225b1e75ed93bddb8d456fb7}, intrahash = {38a2e84d3dfd845d9c260d5f15161c6f}, month = apr, pages = {646--655}, publisher = {IEEE}, title = {Building Community Wikipedias: A Machine-Human Partnership Approach}, url = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=4497473&tag=1}, year = 2008 } @inproceedings{milne2008effective, abstract = {This paper describes a new technique for obtaining measures of semantic relatedness. Like other recent approaches, it uses Wikipedia to provide structured world knowledge about the terms of interest. Out approach is unique in that it does so using the hyperlink structure of Wikipedia rather than its category hierarchy or textual content. Evaluation with manually defined measures of semantic relatedness reveals this to be an effective compromise between the ease of computation of the former approach and the accuracy of the latter.}, author = {Milne, David and Witten, Ian H.}, booktitle = {Proceeding of AAAI Workshop on Wikipedia and Artificial Intelligence: an Evolving Synergy}, interhash = {f8b0b3ba8f4a1c20e3d5d732a221f102}, intrahash = {f4daaa2541e89bdd1996c42f8f4b3a4b}, month = jul, pages = {25--30}, publisher = {AAAI Press}, title = {An effective, low-cost measure of semantic relatedness obtained from Wikipedia links}, url = {https://www.aaai.org/Papers/Workshops/2008/WS-08-15/WS08-15-005.pdf}, year = 2008 } @inproceedings{milne2008learning, abstract = {This paper describes how to automatically cross-reference documents with Wikipedia: the largest knowledge base ever known. It explains how machine learning can be used to identify significant terms within unstructured text, and enrich it with links to the appropriate Wikipedia articles. The resulting link detector and disambiguator performs very well, with recall and precision of almost 75%. This performance is constant whether the system is evaluated on Wikipedia articles or "real world" documents.

This work has implications far beyond enriching documents with explanatory links. It can provide structured knowledge about any unstructured fragment of text. Any task that is currently addressed with bags of words - indexing, clustering, retrieval, and summarization to name a few - could use the techniques described here to draw on a vast network of concepts and semantics.}, address = {New York, NY, USA}, author = {Milne, David and Witten, Ian H.}, booktitle = {Proceedings of the 17th ACM conference on Information and knowledge management}, doi = {10.1145/1458082.1458150}, interhash = {44159e289485110212602792e72bbd74}, intrahash = {fd9cd6bbf302731d5af3f6e748cdb359}, isbn = {978-1-59593-991-3}, location = {Napa Valley, California, USA}, pages = {509--518}, publisher = {ACM}, title = {Learning to link with wikipedia}, url = {http://doi.acm.org/10.1145/1458082.1458150}, year = 2008 } @inproceedings{stvilia2005assessing, abstract = {Effective information quality analysis needs powerful yet easy ways to obtain metrics. The English version of Wikipedia provides an extremely interesting yet challenging case for the study of Information Quality dynamics at both macro and micro levels. We propose seven {IQ} metrics which can be evaluated automatically and test the set on a representative sample of Wikipedia content. The methodology of the metrics construction and the results of tests, along with a number of statistical characterizations of Wikipedia articles, their content construction, process metadata and social context are reported.}, address = {Cambridge, MA}, author = {Stvilia, Besiki and Twidale, Michael B. and Smith, Linda C. and Gasser, Les}, booktitle = {Proceedings of the 2005 International Conference on Information Quality}, interhash = {b84acb7b890edee9c53c216b0faadbec}, intrahash = {33be94d76729286b2bfc3d31a98f88db}, pages = {442--454}, publisher = {MIT}, title = {Assessing information quality of a community-based encyclopedia}, year = 2005 } @inproceedings{hu2007measuring, abstract = {Wikipedia has grown to be the world largest and busiest free encyclopedia, in which articles are collaboratively written and maintained by volunteers online. Despite its success as a means of knowledge sharing and collaboration, the public has never stopped criticizing the quality of Wikipedia articles edited by non-experts and inexperienced contributors. In this paper, we investigate the problem of assessing the quality of articles in collaborative authoring of Wikipedia. We propose three article quality measurement models that make use of the interaction data between articles and their contributors derived from the article edit history. Our B<scp>asic</scp> model is designed based on the mutual dependency between article quality and their author authority. The P<scp>eer</scp>R<scp>eview</scp> model introduces the review behavior into measuring article quality. Finally, our P<scp>rob</scp>R<scp>eview</scp> models extend P<scp>eer</scp>R<scp>eview</scp> with partial reviewership of contributors as they edit various portions of the articles. We conduct experiments on a set of well-labeled Wikipedia articles to evaluate the effectiveness of our quality measurement models in resembling human judgement.}, acmid = {1321476}, address = {New York, NY, USA}, author = {Hu, Meiqun and Lim, Ee-Peng and Sun, Aixin and Lauw, Hady Wirawan and Vuong, Ba-Quy}, booktitle = {Proceedings of the Sixteenth ACM Conference on Conference on Information and Knowledge Management}, doi = {10.1145/1321440.1321476}, interhash = {7fceff7d0b5943b21f66d970cfd65ccb}, intrahash = {cd9077443f7519e9cdce492858753632}, isbn = {978-1-59593-803-9}, location = {Lisbon, Portugal}, numpages = {10}, pages = {243--252}, publisher = {ACM}, series = {CIKM '07}, title = {Measuring article quality in wikipedia: models and evaluation}, url = {http://doi.acm.org/10.1145/1321440.1321476}, year = 2007 } @article{voss2006collaborative, abstract = {This paper explores the system of categories that is used to classify articles in Wikipedia. It is compared to collaborative tagging systems like del.icio.us and to hierarchical classification like the Dewey Decimal Classification (DDC). Specifics and commonalities of these systems of subject indexing are exposed. Analysis of structural and statistical properties (descriptors per record, records per descriptor, descriptor levels) shows that the category system of Wikimedia is a thesaurus that combines collaborative tagging and hierarchical subject indexing in a special way.}, author = {Voss, Jakob}, interhash = {7f47ede73627b6bd286a18325bc4d630}, intrahash = {bf3b43ebc016e2d340d0a5a893252a8b}, journal = {CoRR}, month = apr, title = {Collaborative thesaurus tagging the Wikipedia way}, url = {http://arxiv.org/abs/cs/0604036v2}, volume = {abs/cs/0604036}, year = 2006 } @inproceedings{nazir2008tripartite, abstract = {Social aspects are critical in the decision making process for social actors (human beings). Social aspects can be categorized into social interaction, social communities, social groups or any kind of behavior that emerges from interlinking, overlapping or similarities between interests of a society. These social aspects are dynamic and emergent. Therefore, interlinking them in a social structure, based on bipartite affiliation network, may result in isolated graphs. The major reason is that as these correspondences are dynamic and emergent, they should be coupled with more than a single affiliation in order to sustain the interconnections during interest evolutions. In this paper we propose to interlink actors using multiple tripartite graphs rather than a bipartite graph which was the focus of most of the previous social network building techniques. The utmost benefit of using tripartite graphs is that we can have multiple and hierarchical links between social actors. Therefore in this paper we discuss the extraction, plotting and analysis methods of tripartite relations between authors, articles and categories from Wikipedia. Furthermore, we also discuss the advantages of tripartite relationships over bipartite relationships. As a conclusion of this study we argue based on our results that to build useful, robust and dynamic social networks, actors should be interlinked in one or more tripartite networks.}, author = {Nazir, F. and Takeda, H.}, booktitle = {IEEE International Symposium on Technology and Society}, doi = {10.1109/ISTAS.2008.4559785}, interhash = {7d3cb02c1c7774fe43e4303f0d3c37a4}, intrahash = {c3cca9801ab1e6d2598be1041c19618c}, isbn = {978-1-4244-1669-1}, month = jun, organization = {IEEE}, pages = {1--13}, title = {Extraction and analysis of tripartite relationships from Wikipedia}, url = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=4559785}, year = 2008 } @book{stegbauer2009wikipedia, abstract = {Im Enzyklopädieprojekt Wikipedia kooperieren hunderttausende Menschen; sie kommen mit einem minimalen Aufwand an Koordination aus. Das Ergebnis ist ein öffentliches Gut, das oft sogar traditionellen Nachschlagewerken überlegen ist. Klassische Theorien der Kooperation, wie Eigennutz oder Zusammenhalt durch starke gemeinsame Werte, versagen bei der Erklärung, warum sich so viele Menschen an Wikipedia beteiligen. Diesem Rätsel der Kooperation ist das Buch auf der Spur. Vor allem durch netzwerkanalytische Betrachtungen wird gezeigt, dass sich das Engagement durch eine Verortung im positionalen System reguliert. Folge davon ist nicht nur, dass einige Teilnehmer sehr viel Zeit mit der Mitarbeit an Wikipedia verbringen; ohne es zu wollen, wird hierdurch auch ein großer Teil potenzieller Helfer ausgeschlossen. Die neuen Erklärungen für Freiwilligenarbeit sind auch in anderen Bereichen des bürgerschaftlichen Engagements von Bedeutung. }, address = {Wiesbaden}, asin = {3531165895}, author = {Stegbauer, Christian}, ean = {9783531165899}, edition = 1, interhash = {f8766000bb2e6f319b8cdf0cfdc45719}, intrahash = {c8553408c125bab3475e656e70649574}, isbn = {3531165895}, publisher = {VS Verlag für Sozialwissenschaften}, title = {Wikipedia: Das Rätsel der Kooperation}, url = {http://www.amazon.de/Wikipedia-R%C3%A4tsel-Kooperation-Christian-Stegbauer/dp/3531165895%3FSubscriptionId%3D192BW6DQ43CK9FN0ZGG2%26tag%3Dws%26linkCode%3Dxm2%26camp%3D2025%26creative%3D165953%26creativeASIN%3D3531165895}, year = 2009 } @inproceedings{Hepp:2006:HWC, abstract = {One major obstacle towards adding machine-readable annotation to existing Web content is the lack of domain ontologies. While FOAF and Dublin Core are popular means for expressing relationships between Web resources and between Web resources and literal values, we widely lack unique identifiers for common concepts and instances. Also, most available ontologies have a very weak community grounding in the sense that they are designed by single individuals or small groups of individuals, while the majority of potential users is not involved in the process of proposing new ontology elements or achieving consensus. This is in sharp contrast to natural language where the evolution of the vocabulary is under the control of the user community. At the same time, we can observe that, within Wiki communities, especially Wikipedia, a large number of users is able to create comprehensive domain representations in the sense of unique, machine-feasible, identifiers and concept definitions which are sufficient for humans to grasp the intension of the concepts. The English version of Wikipedia contains now more than one million entries and thus the same amount of URIs plus a human-readable description. While this collection is on the lower end of ontology expressiveness, it is likely the largest living ontology that is available today. In this paper, we (1) show that standard Wiki technology can be easily used as an ontology development environment for named classes, reducing entry barriers for the participation of users in the creation and maintenance of lightweight ontologies, (2) prove that the URIs of Wikipedia entries are surprisingly reliable identifiers for ontology concepts, and (3) demonstrate the applicability of our approach in a use case.}, author = {Hepp, Martin and Bachlechner, Daniel and Siorpaes, Katharina}, booktitle = {Proceedings of the First Workshop on Semantic Wikis -- From Wiki To Semantics}, crossref = {SemWiki2006-proceedings}, editor = {V\"{o}lkel, Max and Schaffert, Sebastian}, interhash = {640e549f082756c3272467c9df64307e}, intrahash = {d120c5c498e9cec5ce2934537c58978f}, month = {June}, owner = {voelkel}, publisher = {ESWC2006}, series = {Workshop on Semantic Wikis}, timestamp = {2006.06.14}, title = {Harvesting Wiki Consensus - Using Wikipedia Entries as Ontology Elements}, url = {http://semwiki.org/semwiki2006}, year = 2006 }