@inproceedings{agichtein2008finding, abstract = {The quality of user-generated content varies drastically from excellent to abuse and spam. As the availability of such content increases, the task of identifying high-quality content sites based on user contributions --social media sites -- becomes increasingly important. Social media in general exhibit a rich variety of information sources: in addition to the content itself, there is a wide array of non-content information available, such as links between items and explicit quality ratings from members of the community. In this paper we investigate methods for exploiting such community feedback to automatically identify high quality content. As a test case, we focus on Yahoo! Answers, a large community question/answering portal that is particularly rich in the amount and types of content and social interactions available in it. We introduce a general classification framework for combining the evidence from different sources of information, that can be tuned automatically for a given social media type and quality definition. In particular, for the community question/answering domain, we show that our system is able to separate high-quality items from the rest with an accuracy close to that of humans}, acmid = {1341557}, address = {New York, NY, USA}, author = {Agichtein, Eugene and Castillo, Carlos and Donato, Debora and Gionis, Aristides and Mishne, Gilad}, booktitle = {Proceedings of the international conference on Web search and web data mining}, doi = {10.1145/1341531.1341557}, interhash = {72c7bf5d1c983c47bfc3c6cc9084c26c}, intrahash = {29c5c74d95dce215a9692b94fc619839}, isbn = {978-1-59593-927-2}, location = {Palo Alto, California, USA}, numpages = {12}, pages = {183--194}, publisher = {ACM}, title = {Finding high-quality content in social media}, url = {http://doi.acm.org/10.1145/1341531.1341557}, year = 2008 } @inproceedings{brew2010using, abstract = {Tracking sentiment in the popular media has long been of interest to media analysts and pundits. With the availability of news content via online syndicated feeds, it is now possible to automate some aspects of this process. There is also great potential to crowdsource Crowdsourcing is a term, sometimes associated with Web 2.0 technologies, that describes outsourcing of tasks to a large often anonymous community. much of the annotation work that is required to train a machine learning system to perform sentiment scoring. We describe such a system for tracking economic sentiment in online media that has been deployed since August 2009. It uses annotations provided by a cohort of non-expert annotators to train a learning system to classify a large body of news items. We report on the design challenges addressed in managing the effort of the annotators and in making annotation an interesting experience.}, acmid = {1860997}, address = {Amsterdam, The Netherlands, The Netherlands}, author = {Brew, Anthony and Greene, Derek and Cunningham, Pádraig}, booktitle = {Proceedings of the 19th European Conference on Artificial Intelligence}, editor = {Coelho, Helder and Studer, Rudi and Wooldridge, Michael}, interhash = {90650749ea1084b729710d37b5865b72}, intrahash = {9643e3c5729886b0b4e85cb3d3d704f5}, isbn = {978-1-60750-605-8}, numpages = {6}, pages = {145--150}, publisher = {IOS Press}, series = {Frontiers in Artificial Intelligence and Applications}, title = {Using Crowdsourcing and Active Learning to Track Sentiment in Online Media}, url = {http://dl.acm.org/citation.cfm?id=1860967.1860997}, volume = 215, year = 2010 }