@techreport{prudhommeaux2008sparql, abstract = {RDF is a directed, labeled graph data format for representing information in the Web. This specification defines the syntax and semantics of the SPARQL query language for RDF. SPARQL can be used to express queries across diverse data sources, whether the data is stored natively as RDF or viewed as RDF via middleware. SPARQL contains capabilities for querying required and optional graph patterns along with their conjunctions and disjunctions. SPARQL also supports extensible value testing and constraining queries by source RDF graph. The results of SPARQL queries can be results sets or RDF graphs. }, author = {Prud'hommeaux, Eric and Seaborne, Andy}, institution = {W3C}, interhash = {dc198da0f907f0129249cab866bbe3d4}, intrahash = {f156278e58de586730e51d791f3b5f69}, month = jan, title = {SPARQL Query Language for RDF}, type = {W3C Recommendation}, url = {http://www.w3.org/TR/rdf-sparql-query/}, year = 2008 } @article{jansen2009patterns, abstract = {Query reformulation is a key user behavior during Web search. Our research goal is to develop predictive models of query reformulation during Web searching. This article reports results from a study in which we automatically classified the query-reformulation patterns for 964,780 Web searching sessions, composed of 1,523,072 queries, to predict the next query reformulation. We employed an n-gram modeling approach to describe the probability of users transitioning from one query-reformulation state to another to predict their next state. We developed first-, second-, third-, and fourth-order models and evaluated each model for accuracy of prediction, coverage of the dataset, and complexity of the possible pattern set. The results show that Reformulation and Assistance account for approximately 45% of all query reformulations; furthermore, the results demonstrate that the first- and second-order models provide the best predictability, between 28 and 40% overall and higher than 70% for some patterns. Implications are that the n-gram approach can be used for improving searching systems and searching assistance.}, author = {Jansen, Bernard J. and Booth, Danielle L. and Spink, Amanda}, doi = {10.1002/asi.21071}, interhash = {c72cb0657de6b51a3dc120521c64626d}, intrahash = {beb7d932ce7da2665184e5b3d933b8fa}, issn = {1532-2890}, journal = {Journal of the American Society for Information Science and Technology}, month = mar, number = 7, pages = {1358--1371}, publisher = {Wiley Subscription Services, Inc.}, title = {Patterns of query reformulation during Web searching}, url = {http://dx.doi.org/10.1002/asi.21071}, volume = 60, year = 2009 } @inproceedings{abiteboul1998incremental, abstract = {Semistructured data is not strictly typed like relational or object-oriented data and may be irregular or incomplete. It often arises in practice, e.g., when heterogeneous data sources are integrated or data is taken from the World Wide Web. Views over semistructured data can be used to filter the data and to restructure (or provide structure to) it. To achieve fast query response time, these views are often materialized. This paper studies incremental maintenance techniques for materialized views over semistructured data. We use the graph-based data model OEM and the query language Lorel, developed at Stanford, as the framework for our work. We propose a new algorithm that produces a set of queries that compute the changes to the view based upon a change to the source. We develop an analytic cost model and compare the cost of executing our incremental maintenance algorithm to that of recomputing the view. We show that for nearly all types of database updates, it is more efficient to apply our incremental maintenance algorithm to the view than to recompute the view from the database, even when there are thousands of such updates.}, author = {Abiteboul, S. and McHugh, J. and Rys, M. and Vassalos, V. and Wiener, J.}, booktitle = {24rd International Conference on Very Large Data Bases}, interhash = {b395f09383de5eb21d34ad8c2b39ab59}, intrahash = {32903b757b4b4d118c77f4aeac4b0d94}, month = aug, pages = {38--49}, publisher = {Morgan Kaufmann}, title = {Incremental Maintenance for Materialized Views over Semistructured Data}, url = {http://ilpubs.stanford.edu:8090/340/}, year = 1998 } @inproceedings{marcus2011crowdsourced, abstract = {Amazon's Mechanical Turk (\MTurk") service allows users to post short tasks (\HITs") that other users can receive a small amount of money for completing. Common tasks on the system include labelling a collection of images, com- bining two sets of images to identify people which appear in both, or extracting sentiment from a corpus of text snippets. Designing a work ow of various kinds of HITs for ltering, aggregating, sorting, and joining data sources together is common, and comes with a set of challenges in optimizing the cost per HIT, the overall time to task completion, and the accuracy of MTurk results. We propose Qurk, a novel query system for managing these work ows, allowing crowd- powered processing of relational databases. We describe a number of query execution and optimization challenges, and discuss some potential solutions.}, author = {Marcus, Adam and Wu, Eugene and Madden, Samuel and Miller, Robert C.}, booktitle = {Proceedings of the 5th Biennial Conference on Innovative Data Systems Research}, doi = {1721.1/62827}, interhash = {b6b7d67c3c09259fb2d5df3f52e24c9d}, intrahash = {29723ba38aa6039091769cd2f69a1514}, month = jan, pages = {211--214}, publisher = {CIDR}, title = {Crowdsourced Databases: Query Processing with People}, url = {http://dspace.mit.edu/handle/1721.1/62827}, year = 2011 } @article{benz2010query, abstract = {Query logs provide a valuable resource for preference information in search. A user clicking on a specific resource after submitting a query indicates that the resource has some relevance with respect to the query. To leverage the information ofquery logs, one can relate submitted queries from specific users to their clicked resources and build a tripartite graph ofusers, resources and queries. This graph resembles the folksonomy structure of social bookmarking systems, where users addtags to resources. In this article, we summarize our work on building folksonomies from query log files. The focus is on threecomparative studies of the system’s content, structure and semantics. Our results show that query logs incorporate typicalfolksonomy properties and that approaches to leverage the inherent semantics of folksonomies can be applied to query logsas well. }, author = {Benz, Dominik and Hotho, Andreas and Jäschke, Robert and Krause, Beate and Stumme, Gerd}, interhash = {dae3931a5f445dc67bf111b26f753c36}, intrahash = {bf96c01262d15fb6eaaf558ecb9a9e69}, journal = {Datenbank-Spektrum}, month = jun, number = 1, pages = {15--24}, title = {Query Logs as Folksonomies}, url = {http://dx.doi.org/10.1007/s13222-010-0004-8}, volume = 10, year = 2010 }