@inproceedings{jeffery2008payasyougo, abstract = {A primary challenge to large-scale data integration is creating semantic equivalences between elements from different data sources that correspond to the same real-world entity or concept. Dataspaces propose a pay-as-you-go approach: automated mechanisms such as schema matching and reference reconciliation provide initial correspondences, termed candidate matches, and then user feedback is used to incrementally confirm these matches. The key to this approach is to determine in what order to solicit user feedback for confirming candidate matches.

In this paper, we develop a decision-theoretic framework for ordering candidate matches for user confirmation using the concept of the value of perfect information (VPI). At the core of this concept is a utility function that quantifies the desirability of a given state; thus, we devise a utility function for dataspaces based on query result quality. We show in practice how to efficiently apply VPI in concert with this utility function to order user confirmations. A detailed experimental evaluation on both real and synthetic datasets shows that the ordering of user feedback produced by this VPI-based approach yields a dataspace with a significantly higher utility than a wide range of other ordering strategies. Finally, we outline the design of Roomba, a system that utilizes this decision-theoretic framework to guide a dataspace in soliciting user feedback in a pay-as-you-go manner.}, acmid = {1376701}, address = {New York, NY, USA}, author = {Jeffery, Shawn R. and Franklin, Michael J. and Halevy, Alon Y.}, booktitle = {Proceedings of the 2008 ACM SIGMOD international conference on Management of data}, doi = {10.1145/1376616.1376701}, interhash = {3ceaf563712b776c1ed97a8cb061f63b}, intrahash = {3bff24fb9eb1e39fa97a524aabb8dee9}, isbn = {978-1-60558-102-6}, location = {Vancouver, Canada}, numpages = {14}, pages = {847--860}, publisher = {ACM}, title = {Pay-as-you-go user feedback for dataspace systems}, url = {http://doi.acm.org/10.1145/1376616.1376701}, year = 2008 } @article{noy2004ontology, abstract = {As ontology development becomes a more ubiquitous and collaborative process, ontology versioning and evolution becomes an important area of ontology research. The many similarities between database-schema evolution and ontology evolution will allow us to build on the extensive research in schema evolution. However, there are also important differences between database schemas and ontologies. The differences stem from different usage paradigms, the presence of explicit semantics and different knowledge models. A lot of problems that existed only in theory in database research come to the forefront as practical problems in ontology evolution. These differences have important implications for the development of ontology-evolution frameworks: The traditional distinction between versioning and evolution is not applicable to ontologies. There are several dimensions along which compatibility between versions must be considered. The set of change operations for ontologies is different. We must develop automatic techniques for finding similarities and differences between versions.}, address = {London}, affiliation = {Stanford Medical Informatics Stanford University Stanford CA 94305 USA}, author = {Noy, Natalya F. and Klein, Michel}, doi = {10.1007/s10115-003-0137-2}, interhash = {4b4ee2090ba5356a3d0e853192968662}, intrahash = {08ee0381e240c3ee414e0eefc7fe1a83}, issn = {0219-1377}, journal = {Knowledge and Information Systems}, keyword = {Computer Science}, number = 4, pages = {428--440}, publisher = {Springer}, title = {Ontology Evolution: Not the Same as Schema Evolution}, url = {http://dx.doi.org/10.1007/s10115-003-0137-2}, volume = 6, year = 2004 } @article{selke2012pushing, abstract = {By incorporating human workers into the query execution process crowd-enabled databases facilitate intelligent, social capabilities like completing missing data at query time or performing cognitive operators. But despite all their flexibility, crowd-enabled databases still maintain rigid schemas. In this paper, we extend crowd-enabled databases by flexible query-driven schema expansion, allowing the addition of new attributes to the database at query time. However, the number of crowd-sourced mini-tasks to fill in missing values may often be prohibitively large and the resulting data quality is doubtful. Instead of simple crowd-sourcing to obtain all values individually, we leverage the usergenerated data found in the Social Web: By exploiting user ratings we build perceptual spaces, i.e., highly-compressed representations of opinions, impressions, and perceptions of large numbers of users. Using few training samples obtained by expert crowd sourcing, we then can extract all missing data automatically from the perceptual space with high quality and at low costs. Extensive experiments show that our approach can boost both performance and quality of crowd-enabled databases, while also providing the flexibility to expand schemas in a query-driven fashion.}, acmid = {2168655}, author = {Selke, Joachim and Lofi, Christoph and Balke, Wolf-Tilo}, interhash = {8d2c0e1e49d00f11fa124deeea4a7dbe}, intrahash = {41224a60badfeefb0fe2cea85f2a4ff0}, issn = {2150-8097}, issue_date = {February 2012}, journal = {Proceedings of the VLDB Endowment}, month = feb, number = 6, numpages = {12}, pages = {538--549}, publisher = {VLDB Endowment}, title = {Pushing the boundaries of crowd-enabled databases with query-driven schema expansion}, url = {http://dl.acm.org/citation.cfm?id=2168651.2168655}, volume = 5, year = 2012 }