@article{liu2012crowdsourcing, abstract = {Some complex problems, such as image tagging and natural language processing, are very challenging for computers, where even state-of-the-art technology is yet able to provide satisfactory accuracy. Therefore, rather than relying solely on developing new and better algorithms to handle such tasks, we look to the crowdsourcing solution -- employing human participation -- to make good the shortfall in current technology. Crowdsourcing is a good supplement to many computer tasks. A complex job may be divided into computer-oriented tasks and human-oriented tasks, which are then assigned to machines and humans respectively.

To leverage the power of crowdsourcing, we design and implement a Crowdsourcing Data Analytics System, CDAS. CDAS is a framework designed to support the deployment of various crowdsourcing applications. The core part of CDAS is a quality-sensitive answering model, which guides the crowdsourcing engine to process and monitor the human tasks. In this paper, we introduce the principles of our quality-sensitive model. To satisfy user required accuracy, the model guides the crowdsourcing query engine for the design and processing of the corresponding crowdsourcing jobs. It provides an estimated accuracy for each generated result based on the human workers' historical performances. When verifying the quality of the result, the model employs an online strategy to reduce waiting time. To show the effectiveness of the model, we implement and deploy two analytics jobs on CDAS, a twitter sentiment analytics job and an image tagging job. We use real Twitter and Flickr data as our queries respectively. We compare our approaches with state-of-the-art classification and image annotation techniques. The results show that the human-assisted methods can indeed achieve a much higher accuracy. By embedding the quality-sensitive model into crowdsourcing query engine, we effectively reduce the processing cost while maintaining the required query answer quality.}, acmid = {2336676}, author = {Liu, Xuan and Lu, Meiyu and Ooi, Beng Chin and Shen, Yanyan and Wu, Sai and Zhang, Meihui}, interhash = {41ad6e73b03373d76d3164ba248335d7}, intrahash = {2091967734f96c4afbc09319d48a8c65}, issn = {2150-8097}, issue_date = {June 2012}, journal = {Proceedings of the VLDB Endowment}, month = jun, number = 10, numpages = {12}, pages = {1040--1051}, publisher = {VLDB Endowment}, title = {CDAS: a crowdsourcing data analytics system}, url = {http://dl.acm.org/citation.cfm?id=2336664.2336676}, volume = 5, year = 2012 } @article{raykar2010learning, abstract = {For many supervised learning tasks it may be infeasible (or very expensive) to obtain objective and reliable labels. Instead, we can collect subjective (possibly noisy) labels from multiple experts or annotators. In practice, there is a substantial amount of disagreement among the annotators, and hence it is of great practical interest to address conventional supervised learning problems in this scenario. In this paper we describe a probabilistic approach for supervised learning when we have multiple annotators providing (possibly noisy) labels but no absolute gold standard. The proposed algorithm evaluates the different experts and also gives an estimate of the actual hidden labels. Experimental results indicate that the proposed method is superior to the commonly used majority voting baseline.}, acmid = {1859894}, author = {Raykar, Vikas C. and Yu, Shipeng and Zhao, Linda H. and Valadez, Gerardo Hermosillo and Florin, Charles and Bogoni, Luca and Moy, Linda}, interhash = {8113daf47997fddf48e4c6c79f2eba56}, intrahash = {14220abe8babfab01c0cdd5ebd5e4b7c}, issn = {1532-4435}, issue_date = {3/1/2010}, journal = {Journal of Machine Learning Research}, month = aug, numpages = {26}, pages = {1297--1322}, publisher = {JMLR.org}, title = {Learning From Crowds}, url = {http://dl.acm.org/citation.cfm?id=1756006.1859894}, volume = 11, year = 2010 } @article{lofi2012information, abstract = {Recent years brought tremendous advancements in the area of automated information extraction. But still, problem scenarios remain where even state-of-the-art algorithms do not provide a satisfying solution. In these cases, another aspiring recent trend can be exploited to achieve the required extraction quality: explicit crowdsourcing of human intelligence tasks. In this paper, we discuss the synergies between information extraction and crowdsourcing. In particular, we methodically identify and classify the challenges and fallacies that arise when combining both approaches. Furthermore, we argue that for harnessing the full potential of either approach, true hybrid techniques must be considered. To demonstrate this point, we showcase such a hybrid technique, which tightly interweaves information extraction with crowdsourcing and machine learning to vastly surpass the abilities of either technique.}, address = {Berlin/Heidelberg}, affiliation = {Institut für Informationssysteme, Technische Universität Braunschweig, Braunschweig, Germany}, author = {Lofi, Christoph and Selke, Joachim and Balke, Wolf-Tilo}, doi = {10.1007/s13222-012-0092-8}, interhash = {941feeaa7bb134e0a5f8b5c0225756b8}, intrahash = {37cc8f1d19105a073544d6594fbbc033}, issn = {1618-2162}, journal = {Datenbank-Spektrum}, keyword = {Computer Science}, number = 2, pages = {109--120}, publisher = {Springer}, title = {Information Extraction Meets Crowdsourcing: A Promising Couple}, url = {http://dx.doi.org/10.1007/s13222-012-0092-8}, volume = 12, year = 2012 } @inproceedings{marcus2011crowdsourced, abstract = {Amazon's Mechanical Turk (\MTurk") service allows users to post short tasks (\HITs") that other users can receive a small amount of money for completing. Common tasks on the system include labelling a collection of images, com- bining two sets of images to identify people which appear in both, or extracting sentiment from a corpus of text snippets. Designing a work ow of various kinds of HITs for ltering, aggregating, sorting, and joining data sources together is common, and comes with a set of challenges in optimizing the cost per HIT, the overall time to task completion, and the accuracy of MTurk results. We propose Qurk, a novel query system for managing these work ows, allowing crowd- powered processing of relational databases. We describe a number of query execution and optimization challenges, and discuss some potential solutions.}, author = {Marcus, Adam and Wu, Eugene and Madden, Samuel and Miller, Robert C.}, booktitle = {Proceedings of the 5th Biennial Conference on Innovative Data Systems Research}, doi = {1721.1/62827}, interhash = {b6b7d67c3c09259fb2d5df3f52e24c9d}, intrahash = {29723ba38aa6039091769cd2f69a1514}, month = jan, pages = {211--214}, publisher = {CIDR}, title = {Crowdsourced Databases: Query Processing with People}, url = {http://dspace.mit.edu/handle/1721.1/62827}, year = 2011 } @inproceedings{franklin2011crowddb, abstract = {Some queries cannot be answered by machines only. Processing such queries requires human input for providing information that is missing from the database, for performing computationally difficult functions, and for matching, ranking, or aggregating results based on fuzzy criteria. CrowdDB uses human input via crowdsourcing to process queries that neither database systems nor search engines can adequately answer. It uses SQL both as a language for posing complex queries and as a way to model data. While CrowdDB leverages many aspects of traditional database systems, there are also important differences. Conceptually, a major change is that the traditional closed-world assumption for query processing does not hold for human input. From an implementation perspective, human-oriented query operators are needed to solicit, integrate and cleanse crowdsourced data. Furthermore, performance and cost depend on a number of new factors including worker affinity, training, fatigue, motivation and location. We describe the design of CrowdDB, report on an initial set of experiments using Amazon Mechanical Turk, and outline important avenues for future work in the development of crowdsourced query processing systems.}, acmid = {1989331}, address = {New York, NY, USA}, author = {Franklin, Michael J. and Kossmann, Donald and Kraska, Tim and Ramesh, Sukriti and Xin, Reynold}, booktitle = {Proceedings of the 2011 international conference on Management of data}, doi = {10.1145/1989323.1989331}, interhash = {8a3f1b0fb94083c918960f1e756fe496}, intrahash = {9525ebea13b41f27a49bafcf2f1132c6}, isbn = {978-1-4503-0661-4}, location = {Athens, Greece}, numpages = {12}, pages = {61--72}, publisher = {ACM}, title = {CrowdDB: answering queries with crowdsourcing}, url = {http://doi.acm.org/10.1145/1989323.1989331}, year = 2011 } @article{selke2012pushing, abstract = {By incorporating human workers into the query execution process crowd-enabled databases facilitate intelligent, social capabilities like completing missing data at query time or performing cognitive operators. But despite all their flexibility, crowd-enabled databases still maintain rigid schemas. In this paper, we extend crowd-enabled databases by flexible query-driven schema expansion, allowing the addition of new attributes to the database at query time. However, the number of crowd-sourced mini-tasks to fill in missing values may often be prohibitively large and the resulting data quality is doubtful. Instead of simple crowd-sourcing to obtain all values individually, we leverage the usergenerated data found in the Social Web: By exploiting user ratings we build perceptual spaces, i.e., highly-compressed representations of opinions, impressions, and perceptions of large numbers of users. Using few training samples obtained by expert crowd sourcing, we then can extract all missing data automatically from the perceptual space with high quality and at low costs. Extensive experiments show that our approach can boost both performance and quality of crowd-enabled databases, while also providing the flexibility to expand schemas in a query-driven fashion.}, acmid = {2168655}, author = {Selke, Joachim and Lofi, Christoph and Balke, Wolf-Tilo}, interhash = {8d2c0e1e49d00f11fa124deeea4a7dbe}, intrahash = {41224a60badfeefb0fe2cea85f2a4ff0}, issn = {2150-8097}, issue_date = {February 2012}, journal = {Proceedings of the VLDB Endowment}, month = feb, number = 6, numpages = {12}, pages = {538--549}, publisher = {VLDB Endowment}, title = {Pushing the boundaries of crowd-enabled databases with query-driven schema expansion}, url = {http://dl.acm.org/citation.cfm?id=2168651.2168655}, volume = 5, year = 2012 } @techreport{parameswaran2011declarative, abstract = {Crowdsourcing enables programmers to incorporate ``human computation'' as a building block in algorithms that cannot be fully automated, such as text analysis and image recognition. Similarly, humans can be used as a building block in data-intensive applications --- providing, comparing, and verifying data used by applications. Building upon the decades-long success of declarative approaches to conventional data management, we use a similar approach for data-intensive applications that incorporate humans. Specifically, declarative queries are posed over stored relational data as well as data computed on-demand from the crowd, and the underlying system orchestrates the computation of query answers. We present Deco, a database system for declarative crowdsourcing. We describe Deco's data model, query language, and our initial prototype. Deco's data model was designed to be general (it can be instantiated to other proposed models), flexible (it allows methods for uncertainty resolution and external access to be plugged in), and principled (it has a precisely-defined semantics). Syntactically, Deco's query language is a simple extension to SQL. Based on Deco's data model, we define a precise semantics for arbitrary queries involving both stored data and data obtained from the crowd. We then describe the Deco query processor, which respects our semantics while coping with the unique combination of latency, monetary cost, and uncertainty introduced in the crowdsourcing environment. Finally, we describe our current system implementation, and we discuss the novel query optimization challenges that form the core of our ongoing work.}, author = {Parameswaran, Aditya and Park, Hyunjung and Garcia-Molina, Hector and Polyzotis, Neoklis and Widom, Jennifer}, institution = {Stanford University}, interhash = {af28066d0b21d87a9ef90f63d7e6095f}, intrahash = {4de5dd97e5466c9f1fc63c0d23b4d90a}, number = 1015, publisher = {Stanford InfoLab}, title = {Deco: Declarative Crowdsourcing}, url = {http://ilpubs.stanford.edu:8090/1015/}, year = 2011 } @inproceedings{minder2011crowdlang, abstract = {Crowdsourcing markets such as Amazon’s Mechanical Turk provide an enormous potential for accomplishing work by combining human and machine computation. Today crowdsourcing is mostly used for massive parallel information processing for a variety of tasks such as image labeling. However, as we move to more sophisticated problem-solving there is little knowledge about managing dependencies between steps and a lack of tools for doing so. As the contribution of this paper, we present a concept of an executable, model-based programming language and a general purpose framework for accomplishing more sophisticated problems. Our approach is inspired by coordination theory and an analysis of emergent collective intelligence. We illustrate the applicability of our proposed language by combining machine and human computation based on existing interaction patterns for several general computation problems.}, author = {Minder, Patrick and Bernstein, Abraham}, booktitle = {Proceedings of the 3rd Human Computation Workshop}, interhash = {0f708aa0b0eb867beb89fe42a9e1a068}, intrahash = {fe3477c51c6a2159ec1c72ecf299f1fb}, pages = {103--108}, publisher = {AAAI Press}, series = {AAAI Workshops}, title = {CrowdLang - First Steps Towards Programmable Human Computers for General Computation}, url = {https://www.aaai.org/ocs/index.php/WS/AAAIW11/paper/viewFile/3891/4251}, year = 2011 } @inproceedings{kittur2011crowdforge, abstract = {Micro-task markets such as Amazon's Mechanical Turk represent a new paradigm for accomplishing work, in which employers can tap into a large population of workers around the globe to accomplish tasks in a fraction of the time and money of more traditional methods. However, such markets have been primarily used for simple, independent tasks, such as labeling an image or judging the relevance of a search result. Here we present a general purpose framework for accomplishing complex and interdependent tasks using micro-task markets. We describe our framework, a web-based prototype, and case studies on article writing, decision making, and science journalism that demonstrate the benefits and limitations of the approach.}, acmid = {2047202}, address = {New York, NY, USA}, author = {Kittur, Aniket and Smus, Boris and Khamkar, Susheel and Kraut, Robert E.}, booktitle = {Proceedings of the 24th annual ACM symposium on User interface software and technology}, doi = {10.1145/2047196.2047202}, interhash = {96bc968750689063846b513c9dac7a57}, intrahash = {e1022258d8e73b250ff625ce2e10095b}, isbn = {978-1-4503-0716-1}, location = {Santa Barbara, California, USA}, numpages = {10}, pages = {43--52}, publisher = {ACM}, title = {CrowdForge: crowdsourcing complex work}, url = {http://doi.acm.org/10.1145/2047196.2047202}, year = 2011 } @article{doan2011crowdsourcing, abstract = {The practice of crowdsourcing is transforming the Web and giving rise to a new field.}, acmid = {1924442}, address = {New York, NY, USA}, author = {Doan, Anhai and Ramakrishnan, Raghu and Halevy, Alon Y.}, doi = {10.1145/1924421.1924442}, interhash = {6dbf364159ce568b92727145a3fca85e}, intrahash = {84f738a6efae5eb6612ea75e8616fecf}, issn = {0001-0782}, issue_date = {April 2011}, journal = {Communications of the ACM}, month = apr, number = 4, numpages = {11}, pages = {86--96}, publisher = {ACM}, title = {Crowdsourcing systems on the World-Wide Web}, url = {http://doi.acm.org/10.1145/1924421.1924442}, volume = 54, year = 2011 } @inproceedings{quinn2011human, abstract = {The rapid growth of human computation within research and industry has produced many novel ideas aimed at organizing web users to do great things. However, the growth is not adequately supported by a framework with which to understand each new system in the context of the old. We classify human computation systems to help identify parallels between different systems and reveal "holes" in the existing work as opportunities for new research. Since human computation is often confused with "crowdsourcing" and other terms, we explore the position of human computation with respect to these related topics.}, acmid = {1979148}, address = {New York, NY, USA}, author = {Quinn, Alexander J. and Bederson, Benjamin B.}, booktitle = {Proceedings of the 2011 annual conference on Human factors in computing systems}, doi = {10.1145/1978942.1979148}, interhash = {f319e8c67a7af1afd804774ccba7b717}, intrahash = {3524eeb1e7a62c5bfbe0cec74a14af21}, isbn = {978-1-4503-0228-9}, location = {Vancouver, BC, Canada}, numpages = {10}, pages = {1403--1412}, publisher = {ACM}, title = {Human computation: a survey and taxonomy of a growing field}, url = {http://doi.acm.org/10.1145/1978942.1979148}, year = 2011 } @inproceedings{ls_leimeister, address = {Bilbao, Spain}, author = {Blohm, I. and Fähling, J. and Leimeister, J. M. and Krcmar, H. and Fischer, J.}, booktitle = {22. ISPIM Conference 2010}, interhash = {9fa366d690b0820bac4b867269a14dba}, intrahash = {e98feabe5b9628669d1f269885002d01}, note = {196 (44-10)}, number = 22, title = {Accelerating customer integration into innovation processes using Pico-Jobs}, url = {http://www.uni-kassel.de/fb7/ibwl/leimeister/pub/JML_196.pdf}, year = 2010 } @inproceedings{ls_leimeister, address = {Bilbao, Spain}, author = {Blohm, I. and Fähling, J. and Leimeister, J. M. and Krcmar, H. and Fischer, J.}, booktitle = {22. ISPIM Conference 2010}, interhash = {9fa366d690b0820bac4b867269a14dba}, intrahash = {e98feabe5b9628669d1f269885002d01}, note = {196 (44-10)}, number = 22, title = {Accelerating customer integration into innovation processes using Pico-Jobs}, url = {http://pubs.wi-kassel.de/wp-content/uploads/2013/03/JML_250.pdf}, year = 2010 }