@article{pai2010, abstract = {Im vorliegenden Beitrag wird die Notwendigkeit einer eindeutigen Autorenidentifikation (Persistant Author Identification - PAI) diskutiert und die bisher vorhandenen fachlichen, nationalen, überregionalen und kommerziellen Ansätze vorgestellt und erläutert. Es zeigt sich, dass die Entwicklung auf dem Sektor der Personennormdateien (authorithy records) mittels VIAF und ISNI auf einem guten Wege ist. Bei der notwendigen Zusammenführung der bisherigen Autoren-Identifikatoren in einem übergeordneten System könnte der neuen ORCID-Initiative große Bedeutung zukommen.}, author = {Steenweg, Helge}, interhash = {ada56d12f3dacd95666004f9ce312e0a}, intrahash = {e5edd10266c5e377122b0c2be4432ca0}, journal = {ABI-Technik}, number = 4, pages = {240-251.}, title = {Eindeutige Autoren-Identifikation – (PAI – Persistent Author Identification) – Versuch einer Annäherung }, volume = 30, year = 2010 } @article{ley2009lessons, abstract = {The DBLP Computer Science Bibliography evolved from an early small experimental Web server to a popular service for the computer science community. Many design decisions and details of the public XML-records behind DBLP never were documented. This paper is a review of the evolution of DBLP. The main perspective is data modeling. In DBLP persons play a central role, our discussion of person names may be applicable to many other data bases. All DBLP data are available for your own experiments. You may either download the complete set, or use a simple XML-based API described in an online appendix.}, acmid = {1687577}, author = {Ley, Michael}, interhash = {a75ae2987d55512b7d0731c7a11a1722}, intrahash = {bb968ff4ba9ae93bc80ba05d16a98ff4}, issn = {2150-8097}, issue_date = {August 2009}, journal = {Proceedings of the VLDB Endowment}, month = aug, number = 2, numpages = {8}, pages = {1493--1500}, publisher = {VLDB Endowment}, title = {DBLP: some lessons learned}, url = {http://dl.acm.org/citation.cfm?id=1687553.1687577}, volume = 2, year = 2009 } @inproceedings{heck2011testing, author = {Heck, Tamara and Peters, Isabella and Stock, Wolfgang G.}, booktitle = {Workshop on Recommender Systems and the Social Web (ACM RecSys'11)}, interhash = {d250a0eb45ca7c198d9cdb238802fd74}, intrahash = {8b68db4ae61ec5c97010fbec2ddaa6c6}, title = {Testing collaborative filtering against co-citation analysis and bibliographic coupling for academic author recommendation}, year = 2011 } @article{bhattacharya2007collective, abstract = {Many databases contain uncertain and imprecise references to real-world entities. The absence of identifiers for the underlying entities often results in a database which contains multiple references to the same entity. This can lead not only to data redundancy, but also inaccuracies in query processing and knowledge extraction. These problems can be alleviated through the use of entity resolution. Entity resolution involves discovering the underlying entities and mapping each database reference to these entities. Traditionally, entities are resolved using pairwise similarity over the attributes of references. However, there is often additional relational information in the data. Specifically, references to different entities may cooccur. In these cases, collective entity resolution, in which entities for cooccurring references are determined jointly rather than independently, can improve entity resolution accuracy. We propose a novel relational clustering algorithm that uses both attribute and relational information for determining the underlying domain entities, and we give an efficient implementation. We investigate the impact that different relational similarity measures have on entity resolution quality. We evaluate our collective entity resolution algorithm on multiple real-world databases. We show that it improves entity resolution performance over both attribute-based baselines and over algorithms that consider relational information but do not resolve entities collectively. In addition, we perform detailed experiments on synthetically generated data to identify data characteristics that favor collective relational resolution over purely attribute-based algorithms.}, acmid = {1217304}, address = {New York, NY, USA}, articleno = {5}, author = {Bhattacharya, Indrajit and Getoor, Lise}, doi = {10.1145/1217299.1217304}, interhash = {3fdd3dfe026b0f18c7b9927ebe471cf1}, intrahash = {5c65a3d97ac6933ca2f63480630d99cf}, issn = {1556-4681}, issue = {1}, issue_date = {March 2007}, journal = {ACM Transactions on Knowledge Discovery from Data}, month = mar, number = 1, publisher = {ACM}, title = {Collective entity resolution in relational data}, url = {http://doi.acm.org/10.1145/1217299.1217304}, volume = 1, year = 2007 } @article{kang2009coauthorship, abstract = {Author name disambiguation deals with clustering the same-name authors into different individuals. To attack the problem, many studies have employed a variety of disambiguation features such as coauthors, titles of papers/publications, topics of articles, emails/affiliations, etc. Among these, co-authorship is the most easily accessible and influential, since inter-person acquaintances represented by co-authorship could discriminate the identities of authors more clearly than other features. This study attempts to explore the net effects of co-authorship on author clustering in bibliographic data. First, to handle the shortage of explicit coauthors listed in known citations, a web-assisted technique of acquiring implicit coauthors of the target author to be disambiguated is proposed. Then, a coauthor disambiguation hypothesis that the identity of an author can be determined by his/her coauthors is examined and confirmed through a variety of author disambiguation experiments.}, author = {Kang, In-Su and Na, Seung-Hoon and Lee, Seungwoo and Jung, Hanmin and Kim, Pyung and Sung, Won-Kyung and Lee, Jong-Hyeok}, doi = {10.1016/j.ipm.2008.06.006}, interhash = {5c265b0643553184944a27dcf769458c}, intrahash = {9800252c9abaa7d478abe9a67a1a6bf6}, issn = {0306-4573}, journal = {Information Processing & Management}, number = 1, pages = {84--97}, title = {On co-authorship for author disambiguation}, url = {http://www.sciencedirect.com/science/article/pii/S0306457308000721}, volume = 45, year = 2009 } @inproceedings{kipp2006exploring, abstract = {This paper examines the results of a study of the three groups involved in creating index keywords or tags: users, authors and intermediaries. Keywords from each of the three groups were compared to determine similarities and differences in term use. Comparisons suggested that there were important differences in the contexts of the three groups that should be taken into account when assigning keywords or designing systems for the organisation of information.}, author = {Kipp, Margaret E. I.}, booktitle = {ASIS\&T 2006 Information Architecture Summit}, citeulike-article-id = {581353}, citeulike-linkout-0 = {http://iasummit.org/2006/conferencedescrip.htm\#109}, interhash = {cc95302ec99e70ffae810ee377ae98e6}, intrahash = {904d826cdf2349f8b6ec802eddd6d0c4}, month = mar, posted-at = {2006-04-11 03:32:13}, priority = {3}, title = {Exploring the context of user, creator and intermediate tagging}, url = {http://iasummit.org/2006/conferencedescrip.htm\#109}, year = 2006 } @article{pai2010, abstract = {Im vorliegenden Beitrag wird die Notwendigkeit einer eindeutigen Autorenidentifikation (Persistant Author Identification - PAI) diskutiert und die bisher vorhandenen fachlichen, nationalen, überregionalen und kommerziellen Ansätze vorgestellt und erläutert. Es zeigt sich, dass die Entwicklung auf dem Sektor der Personennormdateien (authorithy records) mittels VIAF und ISNI auf einem guten Wege ist. Bei der notwendigen Zusammenführung der bisherigen Autoren-Identifikatoren in einem übergeordneten System könnte der neuen ORCID-Initiative große Bedeutung zukommen.}, author = {Steenweg, Helge}, interhash = {ada56d12f3dacd95666004f9ce312e0a}, intrahash = {e5edd10266c5e377122b0c2be4432ca0}, journal = {ABI-Technik}, number = 4, pages = {240-251.}, title = {Eindeutige Autoren-Identifikation – (PAI – Persistent Author Identification) – Versuch einer Annäherung }, volume = 30, year = 2010 } @inproceedings{steyvers_04, author = {Steyvers, M. and Smyth, P. and Rosen-Zvi, M. and Griffiths, T.}, booktitle = {The Tenth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining}, interhash = {b80d5948a7089aa63ce0f7d349c5ab85}, intrahash = {3e7e6fc1cec1569459165827bf7c9ded}, owner = {heinrich}, title = {Probabilistic Author-Topic models for information discovery}, year = 2004 } @article{rosen2004author, author = {Rosen-Zvi, M. and Griffiths, T. and Steyvers, M. and Smyth, P.}, booktitle = {Proceedings of the 20th conference on Uncertainty in artificial intelligence}, interhash = {79b4ff1335f13cdbe18a38086e9fab3b}, intrahash = {6201d5a343724dad09e76ac7ad4fc0a7}, organization = {AUAI Press Arlington, Virginia, United States}, pages = {487--494}, title = {{The author-topic model for authors and documents}}, url = {http://scholar.google.de/scholar.bib?q=info:k09ZBERi_eEJ:scholar.google.com/&output=citation&hl=de&ct=citation&cd=0}, year = 2004 } @article{newman2002assortative, abstract = {A network is said to show assortative mixing if the nodes in the network that have many connections tend to be connected to other nodes with many connections. Here we measure mixing patterns in a variety of networks and find that social networks are mostly assortatively mixed, but that technological and biological networks tend to be disassortative. We propose a model of an assortatively mixed network, which we study both analytically and numerically. Within this model we find that networks percolate more easily if they are assortative and that they are also more robust to vertex removal.}, author = {Newman, M. E. J.}, doi = {10.1103/PhysRevLett.89.208701}, interhash = {7265c6dc287861591f52e46b17404a08}, intrahash = {3ba2913f29e817d122b41e8d78aeeecf}, journal = {Physical Review Letters}, month = oct, number = 20, pages = 208701, publisher = {American Physical Society}, title = {Assortative Mixing in Networks}, url = {http://link.aps.org/doi/10.1103/PhysRevLett.89.208701}, volume = 89, year = 2002 }