@article{morstatter2013sample, author = {Morstatter, Fred and {\"u}rgen Pfeffer, J and Liu, Huan and Carley, Kathleen M}, interhash = {bca742d25a5f5fa43c8f106460449b5b}, intrahash = {58707a28cc5098b9b3444501d5ca9a88}, title = {Is the Sample Good Enough? Comparing Data from Twitter’s Streaming API with Twitter’s Firehose}, url = {http://scholar.google.de/scholar.bib?q=info:NkS2afIrqyQJ:scholar.google.com/&output=citation&hl=de&as_sdt=0,5&ct=citation&cd=0}, year = 2013 } @inproceedings{bullock2011tagging, author = {Bullock, Beate Navarro and Jäschke, Robert and Hotho, Andreas}, booktitle = {Proceedings of the ACM WebSci'11}, interhash = {7afaa67dfeb07f7e0b85abf2be61aff1}, intrahash = {493e03868a98f498628cad31f9320e9f}, month = {June}, title = {Tagging data as implicit feedback for learning-to-rank}, url = {http://journal.webscience.org/463/}, year = 2011 } @inproceedings{anti2008krause, address = {New York, NY, USA}, author = {Krause, Beate and Schmitz, Christoph and Hotho, Andreas and Stumme, Gerd}, booktitle = {AIRWeb '08: Proceedings of the 4th international workshop on Adversarial information retrieval on the web}, doi = {http://doi.acm.org/10.1145/1451983.1451998}, interhash = {a45d40ac7776551301ad9dde5b25357f}, intrahash = {68effe5d4b9460f9388e7685310f74c2}, isbn = {978-1-60558-159-0}, location = {Beijing, China}, pages = {61--68}, publisher = {ACM}, title = {The Anti-Social Tagger - Detecting Spam in Social Bookmarking Systems}, url = {http://airweb.cse.lehigh.edu/2008/submissions/krause_2008_anti_social_tagger.pdf}, year = 2008 } @incollection{books/mit/fayyadPSU96/FayyadPS96, author = {Fayyad, Usama M. and Piatetsky-Shapiro, Gregory and Smyth, Padhraic}, booktitle = {Advances in Knowledge Discovery and Data Mining}, date = {2002-01-03}, interhash = {79663e4b1f464b82ce1ae45345dc424f}, intrahash = {e59886c68d1fc9bb4d1a8d6a1a644a60}, pages = {1-34}, title = {From Data Mining to Knowledge Discovery: An Overview.}, url = {http://dblp.uni-trier.de/db/books/collections/fayyad96.html#FayyadPS96}, year = 1996 } @article{keyhere, abstract = {The identification of the user’s intention or interest through queries that they submit to a search engine can be very useful to offer them more adequate results. In this work we present a framework for the identification of user’s interest in an automaticway, based on the analysis of query logs. This identification is made from two perspectives, the objectives or goals of auser and the categories in which these aims are situated. A manual classification of the queries was made in order to havea reference point and then we applied supervised and unsupervised learning techniques. The results obtained show that fora considerable amount of cases supervised learning is a good option, however through unsupervised learning we found relationshipsbetween users and behaviors that are not easy to detect just taking the query words. Also, through unsupervised learning weestablished that there are categories that we are not able to determine in contrast with other classes that were not consideredbut naturally appear after the clustering process. This allowed us to establish that the combination of supervised and unsupervisedlearning is a good alternative to find user’s goals. From supervised learning we can identify the user interest given certainestablished goals and categories; on the other hand, with unsupervised learning we can validate the goals and categories used,refine them and select the most appropriate to the user’s needs.}, author = {Baeza-Yates, Ricardo and Calderón-Benavides, Liliana and González-Caro, Cristina}, interhash = {92e5f2f5208b5ce2f066dd361ae15758}, intrahash = {27c7357d3337d890fef53168dce9ed33}, journal = {String Processing and Information Retrieval}, pages = {98--109}, title = {The Intention Behind Web Queries}, url = {http://dx.doi.org/10.1007/11880561_9}, year = 2006 } @book{thrun2001, asin = {0262201623}, author = {Thrun, Sebastian and Burgard, Wolfram and Fox, Dieter}, interhash = {f1b5ed6b916d753f4e24749056ab745e}, intrahash = {914a56f048c863f0928bb6d1efe09ff7}, isdn = {978-0-262-20162-9}, title = {Probabilistic Robotics (Intelligent Robotics and Autonomous Agents)}, typesource = {Simple CitationSource}, url = {http://www.amazon.com/Probabilistic-Robotics-Intelligent-Autonomous-Agents/dp/0262201623/ref=sr_11_1/105-3361811-4085215?ie=UTF8&qid=1190743235&sr=11-1}, year = 2001 } @book{buitelaar05ontologylearningbook, editor = {Buitelaar, Paul and Cimiano, Philipp and Magnini, Bernardo}, interhash = {9a5beec1eb7d58ead91f134915be86ab}, intrahash = {0e71ddd52894af0e681b9d9411f7944f}, month = JUL, publisher = {IOS Press}, series = {Frontiers in Artificial Intelligence}, title = {Ontology Learning from Text: Methods, Evaluation and Applications}, volume = 123, year = 2005 } @inbook{baldi03modelling, abstract = {Modeling the Internet and the Web covers the most important aspects of modeling the Web using a modern mathematical and probabilistic treatment. It focuses on the information and application layers, as well as some of the emerging properties of the Internet.  Provides a comprehensive introduction to the modeling of the Internet and the Web at the information level.  Takes a modern approach based on mathematical, probabilistic, and graphical modeling.  Provides an integrated presentation of theory, examples, exercises and applications.  Covers key topics such as text analysis, link analysis, crawling techniques, human behaviour, and commerce on the Web. Interdisciplinary in nature, Modeling the Internet and the Web will be of interest to students and researchers from a variety of disciplines including computer science, machine learning, engineering, statistics, economics, business, and the social sciences.}, author = {Baldi, Pierre and Frasconi, Paolo and Smyth, Padhraic}, booktitle = {Modeling the Internet and the Web: Probabilistic Methods and Algorithms}, citeulike-article-id = {822915}, interhash = {416f2405193ae7d30cffe673dee89df2}, intrahash = {3e4e2899e7d6988218d02a264bcfe24a}, month = {April}, priority = {2}, publisher = {Wiley}, title = {Modeling the Internet and the Web: Probabilistic Methods and Algorithms}, url = {http://eu.wiley.com/WileyCDA/WileyTitle/productCd-0470849061.html}, year = 2003 } @article{wikipediaxml:2005, author = {Denoyer, Ludovic and Gallinari, Patrick}, interhash = {0e9b9afb15804d3e625d73ada85900b1}, intrahash = {493b849942fcaf9ba8e8e68e3cb46d38}, journal = {SIGIR Forum}, title = {{T}he {W}ikipedia {X}{M}{L} {C}orpus}, url = {http://www-connex.lip6.fr/~denoyer/wikipediaXML/}, year = 2006 } @misc{Newman+Hettich+Blake+Merz:1998, author = {Newman, C.L. Blake D.J. and Merz, C.J.}, institution = {University of California, Irvine, Dept. of Information and Computer Sciences}, interhash = {325a6b5ac5a1f044943d4298c21d31dd}, intrahash = {85308db3df761f63f16a7cab4eb8d4aa}, title = {{UCI} Repository of machine learning databases}, url = {http://www.ics.uci.edu/$\sim$mlearn/MLRepository.html}, year = 1998 } @book{mitchell97, author = {Mitchell, Tom M.}, interhash = {479a66c32badb3a455fbdcf8e6633a5d}, intrahash = {3e79734ee1a6e49aee02ffd108224d1c}, publisher = {McGraw-Hill}, title = {Machine Learning}, year = 1997 }