@inproceedings{brew2010using, abstract = {Tracking sentiment in the popular media has long been of interest to media analysts and pundits. With the availability of news content via online syndicated feeds, it is now possible to automate some aspects of this process. There is also great potential to crowdsource Crowdsourcing is a term, sometimes associated with Web 2.0 technologies, that describes outsourcing of tasks to a large often anonymous community. much of the annotation work that is required to train a machine learning system to perform sentiment scoring. We describe such a system for tracking economic sentiment in online media that has been deployed since August 2009. It uses annotations provided by a cohort of non-expert annotators to train a learning system to classify a large body of news items. We report on the design challenges addressed in managing the effort of the annotators and in making annotation an interesting experience.}, acmid = {1860997}, address = {Amsterdam, The Netherlands, The Netherlands}, author = {Brew, Anthony and Greene, Derek and Cunningham, Pádraig}, booktitle = {Proceedings of the 19th European Conference on Artificial Intelligence}, editor = {Coelho, Helder and Studer, Rudi and Wooldridge, Michael}, interhash = {90650749ea1084b729710d37b5865b72}, intrahash = {9643e3c5729886b0b4e85cb3d3d704f5}, isbn = {978-1-60750-605-8}, numpages = {6}, pages = {145--150}, publisher = {IOS Press}, series = {Frontiers in Artificial Intelligence and Applications}, title = {Using Crowdsourcing and Active Learning to Track Sentiment in Online Media}, url = {http://dl.acm.org/citation.cfm?id=1860967.1860997}, volume = 215, year = 2010 } @inproceedings{bullock2011privacyaware, abstract = {With the increased popularity of Web 2.0 services in the last years data privacy has become a major concern for users. The more personal data users reveal, the more difficult it becomes to control its disclosure in the web. However, for Web 2.0 service providers, the data provided by users is a valuable source for offering effective, personalised data mining services. One major application is the detection of spam in social bookmarking systems: in order to prevent a decrease of content quality, providers need to distinguish spammers and exclude them from the system. They thereby experience a conflict of interests: on the one hand, they need to identify spammers based on the information they collect about users, on the other hand, they need to respect privacy concerns and process as few personal data as possible. It would therefore be of tremendous help for system developers and users to know which personal data are needed for spam detection and which can be ignored. In this paper we address these questions by presenting a data privacy aware feature engineering approach. It consists of the design of features for spam classification which are evaluated according to both, performance and privacy conditions. Experiments using data from the social bookmarking system BibSonomy show that both conditions must not exclude each other.}, acmid = {2024306}, address = {New York, NY, USA}, articleno = {15}, author = {Bullock, Beate Navarro and Lerch, Hana and Ro\ssnagel, Alexander and Hotho, Andreas and Stumme, Gerd}, booktitle = {Proceedings of the 11th International Conference on Knowledge Management and Knowledge Technologies}, doi = {10.1145/2024288.2024306}, interhash = {7a2d6a35c124ea0fe31c962f8f150916}, intrahash = {00a8f31185a34957eb16d500d7d51398}, isbn = {978-1-4503-0732-1}, location = {Graz, Austria}, numpages = {8}, pages = {15:1--15:8}, publisher = {ACM}, series = {i-KNOW '11}, title = {Privacy-aware spam detection in social bookmarking systems}, url = {http://doi.acm.org/10.1145/2024288.2024306}, year = 2011 } @inproceedings{narayanan2008robust, abstract = {We present a new class of statistical de- anonymization attacks against high-dimensional micro-data, such as individual preferences, recommendations, transaction records and so on. Our techniques are robust to perturbation in the data and tolerate some mistakes in the adversary's background knowledge. We apply our de-anonymization methodology to the Netflix Prize dataset, which contains anonymous movie ratings of 500,000 subscribers of Netflix, the world's largest online movie rental service. We demonstrate that an adversary who knows only a little bit about an individual subscriber can easily identify this subscriber's record in the dataset. Using the Internet Movie Database as the source of background knowledge, we successfully identified the Netflix records of known users, uncovering their apparent political preferences and other potentially sensitive information.}, author = {Narayanan, Arvind and Shmatikov, Vitaly}, booktitle = {Proc. of the 29th IEEE Symposium on Security and Privacy}, doi = {10.1109/SP.2008.33}, interhash = {77c86be6c4bf7fc51b7faecfe85479fe}, intrahash = {2748ba4684dbe09120aee56c6a0a9de9}, issn = {1081-6011}, month = may, pages = {111--125}, publisher = {IEEE Computer Society}, title = {Robust De-anonymization of Large Sparse Datasets}, url = {http://www.cs.utexas.edu/~shmat/shmat_oak08netflix.pdf}, year = 2008 }