@article{balke2012introduction, abstract = {Transforming unstructured or semi-structured information into structured knowledge is one of the big challenges of today’s knowledge society. While this abstract goal is still unreached and probably unreachable, intelligent information extraction techniques are considered key ingredients on the way to generating and representing knowledge for a wide variety of applications. This is especially true for the current efforts to turn the World Wide Web being the world’s largest collection of information into the world’s largest knowledge base. This introduction gives a broad overview about the major topics and current trends in information extraction.}, address = {Berlin/Heidelberg}, affiliation = {Institut für Informationssysteme, Technische Universität Braunschweig, Braunschweig, Germany}, author = {Balke, Wolf-Tilo}, doi = {10.1007/s13222-012-0090-x}, interhash = {0127ba6c59c3f7f7121429eb098a4b90}, intrahash = {992b3c989c8fda7c58cd9262e2f70907}, issn = {1618-2162}, journal = {Datenbank-Spektrum}, keyword = {Computer Science}, number = 2, pages = {81--88}, publisher = {Springer}, title = {Introduction to Information Extraction: Basic Notions and Current Trends}, url = {http://dx.doi.org/10.1007/s13222-012-0090-x}, volume = 12, year = 2012 } @article{lofi2012information, abstract = {Recent years brought tremendous advancements in the area of automated information extraction. But still, problem scenarios remain where even state-of-the-art algorithms do not provide a satisfying solution. In these cases, another aspiring recent trend can be exploited to achieve the required extraction quality: explicit crowdsourcing of human intelligence tasks. In this paper, we discuss the synergies between information extraction and crowdsourcing. In particular, we methodically identify and classify the challenges and fallacies that arise when combining both approaches. Furthermore, we argue that for harnessing the full potential of either approach, true hybrid techniques must be considered. To demonstrate this point, we showcase such a hybrid technique, which tightly interweaves information extraction with crowdsourcing and machine learning to vastly surpass the abilities of either technique.}, address = {Berlin/Heidelberg}, affiliation = {Institut für Informationssysteme, Technische Universität Braunschweig, Braunschweig, Germany}, author = {Lofi, Christoph and Selke, Joachim and Balke, Wolf-Tilo}, doi = {10.1007/s13222-012-0092-8}, interhash = {941feeaa7bb134e0a5f8b5c0225756b8}, intrahash = {37cc8f1d19105a073544d6594fbbc033}, issn = {1618-2162}, journal = {Datenbank-Spektrum}, keyword = {Computer Science}, number = 2, pages = {109--120}, publisher = {Springer}, title = {Information Extraction Meets Crowdsourcing: A Promising Couple}, url = {http://dx.doi.org/10.1007/s13222-012-0092-8}, volume = 12, year = 2012 } @article{selke2012pushing, abstract = {By incorporating human workers into the query execution process crowd-enabled databases facilitate intelligent, social capabilities like completing missing data at query time or performing cognitive operators. But despite all their flexibility, crowd-enabled databases still maintain rigid schemas. In this paper, we extend crowd-enabled databases by flexible query-driven schema expansion, allowing the addition of new attributes to the database at query time. However, the number of crowd-sourced mini-tasks to fill in missing values may often be prohibitively large and the resulting data quality is doubtful. Instead of simple crowd-sourcing to obtain all values individually, we leverage the usergenerated data found in the Social Web: By exploiting user ratings we build perceptual spaces, i.e., highly-compressed representations of opinions, impressions, and perceptions of large numbers of users. Using few training samples obtained by expert crowd sourcing, we then can extract all missing data automatically from the perceptual space with high quality and at low costs. Extensive experiments show that our approach can boost both performance and quality of crowd-enabled databases, while also providing the flexibility to expand schemas in a query-driven fashion.}, acmid = {2168655}, author = {Selke, Joachim and Lofi, Christoph and Balke, Wolf-Tilo}, interhash = {8d2c0e1e49d00f11fa124deeea4a7dbe}, intrahash = {41224a60badfeefb0fe2cea85f2a4ff0}, issn = {2150-8097}, issue_date = {February 2012}, journal = {Proceedings of the VLDB Endowment}, month = feb, number = 6, numpages = {12}, pages = {538--549}, publisher = {VLDB Endowment}, title = {Pushing the boundaries of crowd-enabled databases with query-driven schema expansion}, url = {http://dl.acm.org/citation.cfm?id=2168651.2168655}, volume = 5, year = 2012 }