@presentation{kohavi2012online, abstract = {The web provides an unprecedented opportunity to accelerate innovation by evaluating ideas quickly and accurately using controlled experiments (e.g., A/B tests and their generalizations). Whether for front-end user-interface changes, or backend recommendation systems and relevance algorithms, online controlled experiments are now utilized to make data-driven decisions at Amazon, Microsoft, eBay, Facebook, Google, Yahoo, Zynga, and at many other companies. While the theory of a controlled experiment is simple, and dates back to Sir Ronald A. Fisher’s experiments at the Rothamsted Agricultural Experimental Station in England in the 1920s, the deployment and mining of online controlled experiments at scale—thousands of experiments now—has taught us many lessons. We provide an introduction, share real examples, key learnings, cultural challenges, and humbling statistics. }, author = {Kohavi, Ron}, day = 12, interhash = {36a473c449c5ede0589c2801781a0579}, intrahash = {aa31e13651d5d1eab42e449e55a0e745}, month = sep, title = {Online Controlled Experiments: Introduction, Learnings, and Humbling Statistics}, type = {Industry keynote at ACM Recommender Systems}, url = {http://www.exp-platform.com/Pages/2012RecSys.aspx}, year = 2012 } @inproceedings{brew2010using, abstract = {Tracking sentiment in the popular media has long been of interest to media analysts and pundits. With the availability of news content via online syndicated feeds, it is now possible to automate some aspects of this process. There is also great potential to crowdsource Crowdsourcing is a term, sometimes associated with Web 2.0 technologies, that describes outsourcing of tasks to a large often anonymous community. much of the annotation work that is required to train a machine learning system to perform sentiment scoring. We describe such a system for tracking economic sentiment in online media that has been deployed since August 2009. It uses annotations provided by a cohort of non-expert annotators to train a learning system to classify a large body of news items. We report on the design challenges addressed in managing the effort of the annotators and in making annotation an interesting experience.}, acmid = {1860997}, address = {Amsterdam, The Netherlands, The Netherlands}, author = {Brew, Anthony and Greene, Derek and Cunningham, Pádraig}, booktitle = {Proceedings of the 19th European Conference on Artificial Intelligence}, editor = {Coelho, Helder and Studer, Rudi and Wooldridge, Michael}, interhash = {90650749ea1084b729710d37b5865b72}, intrahash = {9643e3c5729886b0b4e85cb3d3d704f5}, isbn = {978-1-60750-605-8}, numpages = {6}, pages = {145--150}, publisher = {IOS Press}, series = {Frontiers in Artificial Intelligence and Applications}, title = {Using Crowdsourcing and Active Learning to Track Sentiment in Online Media}, url = {http://dl.acm.org/citation.cfm?id=1860967.1860997}, volume = 215, year = 2010 } @inproceedings{ahn2007topological, abstract = {Social networking services are a fast-growing business in the Internet. However, it is unknown if online relationships and their growth patterns are the same as in real-life social networks. In this paper, we compare the structures of three online social networking services: Cyworld, MySpace, and orkut, each with more than 10 million users, respectively. We have access to complete data of Cyworld's ilchon (friend) relationships and analyze its degree distribution, clustering property, degree correlation, and evolution over time. We also use Cyworld data to evaluate the validity of snowball sampling method, which we use to crawl and obtain partial network topologies of MySpace and orkut. Cyworld, the oldest of the three, demonstrates a changing scaling behavior over time in degree distribution. The latest Cyworld data's degree distribution exhibits a multi-scaling behavior, while those of MySpace and orkut have simple scaling behaviors with different exponents. Very interestingly, each of the two e ponents corresponds to the different segments in Cyworld's degree distribution. Certain online social networking services encourage online activities that cannot be easily copied in real life; we show that they deviate from close-knit online social networks which show a similar degree correlation pattern to real-life social networks.}, address = {New York, NY, USA}, author = {Ahn, Yong-Yeol and Han, Seungyeop and Kwak, Haewoon and Moon, Sue and Jeong, Hawoong}, booktitle = {Proceedings of the 16th International Conference on World Wide Web}, doi = {10.1145/1242572.1242685}, interhash = {444ffef9e7a5b4255d78f26f0409864d}, intrahash = {80928579cc079e0e27c8a28b23a300b7}, isbn = {978-1-59593-654-7}, location = {Banff, Alberta, Canada}, pages = {835--844}, publisher = {ACM}, title = {Analysis of topological characteristics of huge online social networking services}, url = {http://portal.acm.org/citation.cfm?id=1242685}, year = 2007 }