@article{alonso2008crowdsourcing, abstract = {Relevance evaluation is an essential part of the development and maintenance of information retrieval systems. Yet traditional evaluation approaches have several limitations; in particular, conducting new editorial evaluations of a search system can be very expensive. We describe a new approach to evaluation called TERC, based on the crowdsourcing paradigm, in which many online users, drawn from a large community, each performs a small evaluation task.}, acmid = {1480508}, address = {New York, NY, USA}, author = {Alonso, Omar and Rose, Daniel E. and Stewart, Benjamin}, doi = {10.1145/1480506.1480508}, interhash = {8441d7fed92813634f61fa148ef2b870}, intrahash = {4a47833e85558b740788607cb79ba795}, issn = {0163-5840}, issue_date = {December 2008}, journal = {SIGIR Forum}, month = nov, number = 2, numpages = {7}, pages = {9--15}, publisher = {ACM}, title = {Crowdsourcing for relevance evaluation}, url = {http://doi.acm.org/10.1145/1480506.1480508}, volume = 42, year = 2008 } @article{raykar2010learning, abstract = {For many supervised learning tasks it may be infeasible (or very expensive) to obtain objective and reliable labels. Instead, we can collect subjective (possibly noisy) labels from multiple experts or annotators. In practice, there is a substantial amount of disagreement among the annotators, and hence it is of great practical interest to address conventional supervised learning problems in this scenario. In this paper we describe a probabilistic approach for supervised learning when we have multiple annotators providing (possibly noisy) labels but no absolute gold standard. The proposed algorithm evaluates the different experts and also gives an estimate of the actual hidden labels. Experimental results indicate that the proposed method is superior to the commonly used majority voting baseline.}, acmid = {1859894}, author = {Raykar, Vikas C. and Yu, Shipeng and Zhao, Linda H. and Valadez, Gerardo Hermosillo and Florin, Charles and Bogoni, Luca and Moy, Linda}, interhash = {8113daf47997fddf48e4c6c79f2eba56}, intrahash = {14220abe8babfab01c0cdd5ebd5e4b7c}, issn = {1532-4435}, issue_date = {3/1/2010}, journal = {Journal of Machine Learning Research}, month = aug, numpages = {26}, pages = {1297--1322}, publisher = {JMLR.org}, title = {Learning From Crowds}, url = {http://dl.acm.org/citation.cfm?id=1756006.1859894}, volume = 11, year = 2010 } @article{lofi2012information, abstract = {Recent years brought tremendous advancements in the area of automated information extraction. But still, problem scenarios remain where even state-of-the-art algorithms do not provide a satisfying solution. In these cases, another aspiring recent trend can be exploited to achieve the required extraction quality: explicit crowdsourcing of human intelligence tasks. In this paper, we discuss the synergies between information extraction and crowdsourcing. In particular, we methodically identify and classify the challenges and fallacies that arise when combining both approaches. Furthermore, we argue that for harnessing the full potential of either approach, true hybrid techniques must be considered. To demonstrate this point, we showcase such a hybrid technique, which tightly interweaves information extraction with crowdsourcing and machine learning to vastly surpass the abilities of either technique.}, address = {Berlin/Heidelberg}, affiliation = {Institut für Informationssysteme, Technische Universität Braunschweig, Braunschweig, Germany}, author = {Lofi, Christoph and Selke, Joachim and Balke, Wolf-Tilo}, doi = {10.1007/s13222-012-0092-8}, interhash = {941feeaa7bb134e0a5f8b5c0225756b8}, intrahash = {37cc8f1d19105a073544d6594fbbc033}, issn = {1618-2162}, journal = {Datenbank-Spektrum}, keyword = {Computer Science}, number = 2, pages = {109--120}, publisher = {Springer}, title = {Information Extraction Meets Crowdsourcing: A Promising Couple}, url = {http://dx.doi.org/10.1007/s13222-012-0092-8}, volume = 12, year = 2012 } @inproceedings{marcus2011crowdsourced, abstract = {Amazon's Mechanical Turk (\MTurk") service allows users to post short tasks (\HITs") that other users can receive a small amount of money for completing. Common tasks on the system include labelling a collection of images, com- bining two sets of images to identify people which appear in both, or extracting sentiment from a corpus of text snippets. Designing a work ow of various kinds of HITs for ltering, aggregating, sorting, and joining data sources together is common, and comes with a set of challenges in optimizing the cost per HIT, the overall time to task completion, and the accuracy of MTurk results. We propose Qurk, a novel query system for managing these work ows, allowing crowd- powered processing of relational databases. We describe a number of query execution and optimization challenges, and discuss some potential solutions.}, author = {Marcus, Adam and Wu, Eugene and Madden, Samuel and Miller, Robert C.}, booktitle = {Proceedings of the 5th Biennial Conference on Innovative Data Systems Research}, doi = {1721.1/62827}, interhash = {b6b7d67c3c09259fb2d5df3f52e24c9d}, intrahash = {29723ba38aa6039091769cd2f69a1514}, month = jan, pages = {211--214}, publisher = {CIDR}, title = {Crowdsourced Databases: Query Processing with People}, url = {http://dspace.mit.edu/handle/1721.1/62827}, year = 2011 } @inproceedings{franklin2011crowddb, abstract = {Some queries cannot be answered by machines only. Processing such queries requires human input for providing information that is missing from the database, for performing computationally difficult functions, and for matching, ranking, or aggregating results based on fuzzy criteria. CrowdDB uses human input via crowdsourcing to process queries that neither database systems nor search engines can adequately answer. It uses SQL both as a language for posing complex queries and as a way to model data. While CrowdDB leverages many aspects of traditional database systems, there are also important differences. Conceptually, a major change is that the traditional closed-world assumption for query processing does not hold for human input. From an implementation perspective, human-oriented query operators are needed to solicit, integrate and cleanse crowdsourced data. Furthermore, performance and cost depend on a number of new factors including worker affinity, training, fatigue, motivation and location. We describe the design of CrowdDB, report on an initial set of experiments using Amazon Mechanical Turk, and outline important avenues for future work in the development of crowdsourced query processing systems.}, acmid = {1989331}, address = {New York, NY, USA}, author = {Franklin, Michael J. and Kossmann, Donald and Kraska, Tim and Ramesh, Sukriti and Xin, Reynold}, booktitle = {Proceedings of the 2011 international conference on Management of data}, doi = {10.1145/1989323.1989331}, interhash = {8a3f1b0fb94083c918960f1e756fe496}, intrahash = {9525ebea13b41f27a49bafcf2f1132c6}, isbn = {978-1-4503-0661-4}, location = {Athens, Greece}, numpages = {12}, pages = {61--72}, publisher = {ACM}, title = {CrowdDB: answering queries with crowdsourcing}, url = {http://doi.acm.org/10.1145/1989323.1989331}, year = 2011 }