@inproceedings{peters2011crowdsourcing, abstract = {Qualitative journal evaluation makes use of cumulated content descriptions of single articles. These can either be represented by author-generated keywords, professionally indexed subject headings, automatically extracted terms or by reader-generated tags as used in social bookmarking systems. It is assumed that particularly the users? view on article content differs significantly from the authors? or indexers? perspectives. To verify this assumption, title and abstract terms, author keywords, Inspec subject headings, KeyWords PlusTM and tags are compared by calculating the overlap between the respective datasets. Our approach includes extensive term preprocessing (i.e. stemming, spelling unifications) to gain a homogeneous term collection. When term overlap is calculated for every single document of the dataset, similarity values are low. Thus, the presented study confirms the assumption, that the different types of keywords each reflect a different perspective of the articles? contents and that tags (cumulated across articles) can be used in journal evaluation to represent a reader-specific view on published content.}, author = {Peters, Isabella and Haustein, Stefanie and Terliesner, Jens}, booktitle = {ACM WebSci'11}, interhash = {def78a2b12565187bcac0cf08089b7a1}, intrahash = {8e03cf8d57f903da395c07e9a9125f08}, month = {June}, note = {WebSci Conference 2011}, pages = {1--4}, title = {Crowdsourcing in Article Evaluation}, url = {http://journal.webscience.org/487/}, year = 2011 } @inproceedings{liu2011browsing, abstract = {To optimize the performance of web crawlers, various page importance measures have been studied to select and order URLs in crawling. Most sophisticated measures (e.g. breadth-first and PageRank) are based on link structure. In this paper, we treat the problem from another perspective and propose to measure page importance through mining user interest and behaviors from web browse logs. Unlike most existing approaches which work on single URL, in this paper, both the log mining and the crawl ordering are performed at the granularity of URL pattern. The proposed URL pattern-based crawl orderings are capable to properly predict the importance of newly created (unseen) URLs. Promising experimental results proved the feasibility of our approach.}, acmid = {2063593}, address = {New York, NY, USA}, author = {Liu, Minghai and Cai, Rui and Zhang, Ming and Zhang, Lei}, booktitle = {Proceedings of the 20th ACM international conference on Information and knowledge management}, doi = {10.1145/2063576.2063593}, interhash = {7b45567cb6a492d8354dc32401549291}, intrahash = {3ce89bd8a3d3eb6306b739fe1f4088df}, isbn = {978-1-4503-0717-8}, location = {Glasgow, Scotland, UK}, numpages = {6}, pages = {87--92}, publisher = {ACM}, title = {User browsing behavior-driven web crawling}, url = {http://doi.acm.org/10.1145/2063576.2063593}, year = 2011 } @inproceedings{bai2011discovering, abstract = {Search engines rely upon crawling to build their Web page collections. A Web crawler typically discovers new URLs by following the link structure induced by links on Web pages. As the number of documents on the Web is large, discovering newly created URLs may take arbitrarily long, and depending on how a given page is connected to others, such a crawler may miss the pages altogether. In this paper, we evaluate the benefits of integrating a passive URL discovery mechanism into a Web crawler. This mechanism is passive in the sense that it does not require the crawler to actively fetch documents from the Web to discover URLs. We focus here on a mechanism that uses toolbar data as a representative source for new URL discovery. We use the toolbar logs of Yahoo! to characterize the URLs that are accessed by users via their browsers, but not discovered by Yahoo! Web crawler. We show that a high fraction of URLs that appear in toolbar logs are not discovered by the crawler. We also reveal that a certain fraction of URLs are discovered by the crawler later than the time they are first accessed by users. One important conclusion of our work is that web search engines can highly benefit from user feedback in the form of toolbar logs for passive URL discovery.}, acmid = {2063592}, address = {New York, NY, USA}, author = {Bai, Xiao and Cambazoglu, B. Barla and Junqueira, Flavio P.}, booktitle = {Proceedings of the 20th ACM international conference on Information and knowledge management}, doi = {10.1145/2063576.2063592}, interhash = {dfef0e1af73b9c9e5096a2118368ad21}, intrahash = {4e73c9d6ed79931ccdfcfda938e3be62}, isbn = {978-1-4503-0717-8}, location = {Glasgow, Scotland, UK}, numpages = {10}, pages = {77--86}, publisher = {ACM}, title = {Discovering URLs through user feedback}, url = {http://doi.acm.org/10.1145/2063576.2063592}, year = 2011 } @mastersthesis{olson2012cloud, abstract = {My thesis describes the design and implementation of systems that empower individuals to help their communities respond to critical situations and to participate in research that helps them understand and improve their environments. People want to help their communities respond to threats such as earthquakes, wildfires, mudslides and hurricanes, and they want to participate in research that helps them understand and improve their environment. “Citizen Science” projects that facilitate this interaction include projects that monitor climate change, water quality and animal habitats. My thesis explores the design and analysis of community-based sense and response systems that enable individuals to participate in critical community activities and scientific research that monitors their environments.}, author = {Olson, Michael J.}, interhash = {a9cdee464e76cd5210c13d7f66981e83}, intrahash = {d9e22a1a5e9404a805aee5cb0fd406c4}, school = {California Institute of Technology}, title = {Cloud computing for citizen science}, type = {Master's thesis}, url = {http://resolver.caltech.edu/CaltechTHESIS:08232011-122341638}, year = 2012 } @inproceedings{hristova2012mapping, abstract = {Communities of people are better mappers if they are spatially clustered, as revealed in an interesting new paper by Hristova, Mashhadi, Quattrone and Capra from UCL. "This preliminary analysis inspires further inquiry because it shows a clear correlation between spatial affiliation, the internal community structure and the community’s engagement in terms of coverage", according to the authors. They have studied the similarity patterns among eight hundred contributors to OpenStreetMap, the well-known crowdmapping project and detected the hidden community structure. It is a very promising field of research, coupling a social network analysis of crowdsourced data. Participants to such projects are rarely independent individuals: in most cases, they involve communities more than single participants and it would be crucial to uncover how the underlying social structure reflects on the quantity and the quality of the collected data. It has the greatest relevance for citizen science projects, as data quality is often the key issue determining the success or the failure of the collective effort. }, author = {Hristova, Desislava and Mashhadi, Afra and Quattrone, Giovanni and Capra, Licia}, booktitle = {Proc. When the City Meets the Citizen Workshop (WCMCW)}, interhash = {373e02fe56d30b26261a33135e0b7a45}, intrahash = {f0a69ac56b94a471b470ebd56545fafd}, month = jun, title = {Mapping Community Engagement with Urban Crowd-Sourcing}, url = {http://www.cs.ucl.ac.uk/staff/l.capra/publications/wcmcw12.pdf}, year = 2012 } @article{journals/corr/abs-1006-1260, author = {Isella, Lorenzo and Stehlé, Juliette and Barrat, Alain and Cattuto, Ciro and Pinton, Jean-François and den Broeck, Wouter Van}, ee = {http://arxiv.org/abs/1006.1260}, interhash = {4a20da6d41e4c1e86e8c04c47b22237c}, intrahash = {53c0555c19fbfd6af5952e2a3abcbdd2}, journal = {CoRR}, note = {informal publication}, title = {What's in a crowd? Analysis of face-to-face behavioral networks}, url = {http://dblp.uni-trier.de/db/journals/corr/corr1006.html#abs-1006-1260}, volume = {abs/1006.1260}, year = 2010 }