@article{raykar2010learning, abstract = {For many supervised learning tasks it may be infeasible (or very expensive) to obtain objective and reliable labels. Instead, we can collect subjective (possibly noisy) labels from multiple experts or annotators. In practice, there is a substantial amount of disagreement among the annotators, and hence it is of great practical interest to address conventional supervised learning problems in this scenario. In this paper we describe a probabilistic approach for supervised learning when we have multiple annotators providing (possibly noisy) labels but no absolute gold standard. The proposed algorithm evaluates the different experts and also gives an estimate of the actual hidden labels. Experimental results indicate that the proposed method is superior to the commonly used majority voting baseline.}, acmid = {1859894}, author = {Raykar, Vikas C. and Yu, Shipeng and Zhao, Linda H. and Valadez, Gerardo Hermosillo and Florin, Charles and Bogoni, Luca and Moy, Linda}, interhash = {8113daf47997fddf48e4c6c79f2eba56}, intrahash = {14220abe8babfab01c0cdd5ebd5e4b7c}, issn = {1532-4435}, issue_date = {3/1/2010}, journal = {Journal of Machine Learning Research}, month = aug, numpages = {26}, pages = {1297--1322}, publisher = {JMLR.org}, title = {Learning From Crowds}, url = {http://dl.acm.org/citation.cfm?id=1756006.1859894}, volume = 11, year = 2010 } @incollection{li2011incorporating, abstract = {In scientific cooperation network, ambiguous author names may occur due to the existence of multiple authors with the same name. Users of these networks usually want to know the exact author of a paper, whereas we do not have any unique identifier to distinguish them. In this paper, we focus ourselves on such problem, we propose a new method that incorporates user feedback into the model for name disambiguation of scientific cooperation network. Perceptron is used as the classifier. Two features and a constraint drawn from user feedback are incorporated into the perceptron to enhance the performance of name disambiguation. Specifically, we construct user feedback as a training stream, and refine the perceptron continuously. Experimental results show that the proposed algorithm can learn continuously and significantly outperforms the previous methods without introducing user interactions.}, address = {Berlin/Heidelberg}, affiliation = {Intelligent and Distributed Computing Lab, School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, 430074 P.R. China}, author = {Li, Yuhua and Wen, Aiming and Lin, Quan and Li, Ruixuan and Lu, Zhengding}, booktitle = {Web-Age Information Management}, doi = {10.1007/978-3-642-23535-1_39}, editor = {Wang, Haixun and Li, Shijun and Oyama, Satoshi and Hu, Xiaohua and Qian, Tieyun}, interhash = {3baace12cb4481dcceb53c2d47f413b5}, intrahash = {96f2ae8551126527c2dfe69c8fa22f6c}, isbn = {978-3-642-23534-4}, keyword = {Computer Science}, pages = {454--466}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, title = {Incorporating User Feedback into Name Disambiguation of Scientific Cooperation Network}, url = {http://dx.doi.org/10.1007/978-3-642-23535-1_39}, volume = 6897, year = 2011 } @article{lofi2012information, abstract = {Recent years brought tremendous advancements in the area of automated information extraction. But still, problem scenarios remain where even state-of-the-art algorithms do not provide a satisfying solution. In these cases, another aspiring recent trend can be exploited to achieve the required extraction quality: explicit crowdsourcing of human intelligence tasks. In this paper, we discuss the synergies between information extraction and crowdsourcing. In particular, we methodically identify and classify the challenges and fallacies that arise when combining both approaches. Furthermore, we argue that for harnessing the full potential of either approach, true hybrid techniques must be considered. To demonstrate this point, we showcase such a hybrid technique, which tightly interweaves information extraction with crowdsourcing and machine learning to vastly surpass the abilities of either technique.}, address = {Berlin/Heidelberg}, affiliation = {Institut für Informationssysteme, Technische Universität Braunschweig, Braunschweig, Germany}, author = {Lofi, Christoph and Selke, Joachim and Balke, Wolf-Tilo}, doi = {10.1007/s13222-012-0092-8}, interhash = {941feeaa7bb134e0a5f8b5c0225756b8}, intrahash = {37cc8f1d19105a073544d6594fbbc033}, issn = {1618-2162}, journal = {Datenbank-Spektrum}, keyword = {Computer Science}, number = 2, pages = {109--120}, publisher = {Springer}, title = {Information Extraction Meets Crowdsourcing: A Promising Couple}, url = {http://dx.doi.org/10.1007/s13222-012-0092-8}, volume = 12, year = 2012 } @inproceedings{paton2011feedback, abstract = {User feedback is gaining momentum as a means of addressing the difficulties underlying information integration tasks. It can be used to assist users in building information integration systems and to improve the quality of existing systems, e.g., in dataspaces. Existing proposals in the area are confined to specific integration sub-problems considering a specific kind of feedback sought, in most cases, from a single user. We argue in this paper that, in order to maximize the benefits that can be drawn from user feedback, it should be considered and managed as a first class citizen. Accordingly, we present generic operations that underpin the management of feedback within information integration systems, and that are applicable to feedback of different kinds, potentially supplied by multiple users with different expectations. We present preliminary solutions that can be adopted for realizing such operations, and sketch a research agenda for the information integration community.}, author = {Paton, Norman W. and Fernandes, Alvaro A. A. and Hedeler, Cornelia and Embury, Suzanne M.}, booktitle = {Proceedings of the Conference on Innovative Data Systems Research (CIDR)}, interhash = {1874e5c09919244808457021d2d884d1}, intrahash = {cd75210156615616e4f25c91143040c4}, pages = {175--183}, title = {User Feedback as a First Class Citizen in Information Integration Systems}, url = {http://www.cidrdb.org/cidr2011/Papers/CIDR11_Paper21.pdf}, year = 2011 } @inproceedings{chai2009efficiently, abstract = {Many applications increasingly employ information extraction and integration (IE/II) programs to infer structures from unstructured data. Automatic IE/II are inherently imprecise. Hence such programs often make many IE/II mistakes, and thus can significantly benefit from user feedback. Today, however, there is no good way to automatically provide and process such feedback. When finding an IE/II mistake, users often must alert the developer team (e.g., via email or Web form) about the mistake, and then wait for the team to manually examine the program internals to locate and fix the mistake, a slow, error-prone, and frustrating process.

In this paper we propose a solution for users to directly provide feedback and for IE/II programs to automatically process such feedback. In our solution a developer U uses hlog, a declarative IE/II language, to write an IE/II program P. Next, U writes declarative user feedback rules that specify which parts of P's data (e.g., input, intermediate, or output data) users can edit, and via which user interfaces. Next, the so-augmented program P is executed, then enters a loop of waiting for and incorporating user feedback. Given user feedback F on a data portion of P, we show how to automatically propagate F to the rest of P, and to seamlessly combine F with prior user feedback. We describe the syntax and semantics of hlog, a baseline execution strategy, and then various optimization techniques. Finally, we describe experiments with real-world data that demonstrate the promise of our solution.}, acmid = {1559857}, address = {New York, NY, USA}, author = {Chai, Xiaoyong and Vuong, Ba-Quy and Doan, AnHai and Naughton, Jeffrey F.}, booktitle = {Proceedings of the 35th SIGMOD international conference on Management of data}, doi = {10.1145/1559845.1559857}, interhash = {5860215447e374b059597c0e3864e388}, intrahash = {d6c9fbf442a935dc0618107f8fb54d44}, isbn = {978-1-60558-551-2}, location = {Providence, Rhode Island, USA}, numpages = {14}, pages = {87--100}, publisher = {ACM}, title = {Efficiently incorporating user feedback into information extraction and integration programs}, url = {http://doi.acm.org/10.1145/1559845.1559857}, year = 2009 }