@inproceedings{chrupala2010named, author = {Chrupala, Grzegorz and Klakow, Dietrich}, booktitle = {LREC}, crossref = {conf/lrec/2010}, editor = {Calzolari, Nicoletta and Choukri, Khalid and Maegaard, Bente and Mariani, Joseph and Odijk, Jan and Piperidis, Stelios and Rosner, Mike and Tapias, Daniel}, ee = {http://www.lrec-conf.org/proceedings/lrec2010/summaries/538.html}, interhash = {85b8f5e04b66df3fe9411fc8f81ae43a}, intrahash = {68b98f37dc2dd0a89f580d9e6b65c780}, isbn = {2-9517408-6-7}, publisher = {European Language Resources Association}, title = {A Named Entity Labeler for German: Exploiting Wikipedia and Distributional Clusters.}, url = {http://lexitron.nectec.or.th/public/LREC-2010_Malta/pdf/538_Paper.pdf}, year = 2010 } @inproceedings{mihalcea2007wikify, abstract = {This paper introduces the use of Wikipedia as a resource for automatic keyword extraction and word sense disambiguation, and shows how this online encyclopedia can be used to achieve state-of-the-art results on both these tasks. The paper also shows how the two methods can be combined into a system able to automatically enrich a text with links to encyclopedic knowledge. Given an input document, the system identifies the important concepts in the text and automatically links these concepts to the corresponding Wikipedia pages. Evaluations of the system show that the automatic annotations are reliable and hardly distinguishable from manual annotations.}, acmid = {1321475}, address = {New York, NY, USA}, author = {Mihalcea, Rada and Csomai, Andras}, booktitle = {Proceedings of the sixteenth ACM Conference on information and knowledge management}, doi = {10.1145/1321440.1321475}, interhash = {8e00f4c1515b89a9a035c9d4b78d7bed}, intrahash = {4917a0c8eb1ea05b2d103166dfaeeb6e}, isbn = {978-1-59593-803-9}, location = {Lisbon, Portugal}, numpages = {10}, pages = {233--242}, publisher = {ACM}, title = {Wikify!: linking documents to encyclopedic knowledge}, url = {http://doi.acm.org/10.1145/1321440.1321475}, year = 2007 } @inproceedings{gunes2012eager, abstract = {Key to named entity recognition, the manual gazetteering of entity lists is a costly, errorprone process that often yields results that are incomplete and suffer from sampling bias. Exploiting current sources of structured information, we propose a novel method for extending minimal seed lists into complete gazetteers. Like previous approaches, we value W IKIPEDIA as a huge, well-curated, and relatively unbiased source of entities. However, in contrast to previous work, we exploit not only its content, but also its structure, as exposed in DBPEDIA. We extend gazetteers through Wikipedia categories, carefully limiting the impact of noisy categorizations. The resulting gazetteers easily outperform previous approaches on named entity recognition. }, author = {Gunes, Omer and Schallhart, Christian and Furche, Tim and Lehmann, Jens and Ngomo, Axel-Cyrille Ngonga}, booktitle = {Proceedings of the 3rd Workshop on the People's Web Meets NLP: Collaboratively Constructed Semantic Resources and their Applications to NLP}, interhash = {20c47a41c89ff6c2a8f7bb524185b8ac}, intrahash = {3eac4c009268cd4f2c264dd24053f8a6}, month = jul, organization = {Association for Computational Linguistics}, pages = {29--33}, title = {EAGER: extending automatically gazetteers for entity recognition}, url = {http://acl.eldoc.ub.rug.nl/mirror/W/W12/W12-4005.pdf}, year = 2012 } @article{ley2009lessons, abstract = {The DBLP Computer Science Bibliography evolved from an early small experimental Web server to a popular service for the computer science community. Many design decisions and details of the public XML-records behind DBLP never were documented. This paper is a review of the evolution of DBLP. The main perspective is data modeling. In DBLP persons play a central role, our discussion of person names may be applicable to many other data bases. All DBLP data are available for your own experiments. You may either download the complete set, or use a simple XML-based API described in an online appendix.}, acmid = {1687577}, author = {Ley, Michael}, interhash = {a75ae2987d55512b7d0731c7a11a1722}, intrahash = {bb968ff4ba9ae93bc80ba05d16a98ff4}, issn = {2150-8097}, issue_date = {August 2009}, journal = {Proceedings of the VLDB Endowment}, month = aug, number = 2, numpages = {8}, pages = {1493--1500}, publisher = {VLDB Endowment}, title = {DBLP: some lessons learned}, url = {http://dl.acm.org/citation.cfm?id=1687553.1687577}, volume = 2, year = 2009 } @article{bhattacharya2007collective, abstract = {Many databases contain uncertain and imprecise references to real-world entities. The absence of identifiers for the underlying entities often results in a database which contains multiple references to the same entity. This can lead not only to data redundancy, but also inaccuracies in query processing and knowledge extraction. These problems can be alleviated through the use of entity resolution. Entity resolution involves discovering the underlying entities and mapping each database reference to these entities. Traditionally, entities are resolved using pairwise similarity over the attributes of references. However, there is often additional relational information in the data. Specifically, references to different entities may cooccur. In these cases, collective entity resolution, in which entities for cooccurring references are determined jointly rather than independently, can improve entity resolution accuracy. We propose a novel relational clustering algorithm that uses both attribute and relational information for determining the underlying domain entities, and we give an efficient implementation. We investigate the impact that different relational similarity measures have on entity resolution quality. We evaluate our collective entity resolution algorithm on multiple real-world databases. We show that it improves entity resolution performance over both attribute-based baselines and over algorithms that consider relational information but do not resolve entities collectively. In addition, we perform detailed experiments on synthetically generated data to identify data characteristics that favor collective relational resolution over purely attribute-based algorithms.}, acmid = {1217304}, address = {New York, NY, USA}, articleno = {5}, author = {Bhattacharya, Indrajit and Getoor, Lise}, doi = {10.1145/1217299.1217304}, interhash = {3fdd3dfe026b0f18c7b9927ebe471cf1}, intrahash = {5c65a3d97ac6933ca2f63480630d99cf}, issn = {1556-4681}, issue = {1}, issue_date = {March 2007}, journal = {ACM Transactions on Knowledge Discovery from Data}, month = mar, number = 1, publisher = {ACM}, title = {Collective entity resolution in relational data}, url = {http://doi.acm.org/10.1145/1217299.1217304}, volume = 1, year = 2007 } @article{kang2009coauthorship, abstract = {Author name disambiguation deals with clustering the same-name authors into different individuals. To attack the problem, many studies have employed a variety of disambiguation features such as coauthors, titles of papers/publications, topics of articles, emails/affiliations, etc. Among these, co-authorship is the most easily accessible and influential, since inter-person acquaintances represented by co-authorship could discriminate the identities of authors more clearly than other features. This study attempts to explore the net effects of co-authorship on author clustering in bibliographic data. First, to handle the shortage of explicit coauthors listed in known citations, a web-assisted technique of acquiring implicit coauthors of the target author to be disambiguated is proposed. Then, a coauthor disambiguation hypothesis that the identity of an author can be determined by his/her coauthors is examined and confirmed through a variety of author disambiguation experiments.}, author = {Kang, In-Su and Na, Seung-Hoon and Lee, Seungwoo and Jung, Hanmin and Kim, Pyung and Sung, Won-Kyung and Lee, Jong-Hyeok}, doi = {10.1016/j.ipm.2008.06.006}, interhash = {5c265b0643553184944a27dcf769458c}, intrahash = {9800252c9abaa7d478abe9a67a1a6bf6}, issn = {0306-4573}, journal = {Information Processing & Management}, number = 1, pages = {84--97}, title = {On co-authorship for author disambiguation}, url = {http://www.sciencedirect.com/science/article/pii/S0306457308000721}, volume = 45, year = 2009 }