@article{hotho2010ubiquitous, author = {Hotho, Andreas and {Ulslev Pedersen}, Rasmus and Wurst, Michael}, interhash = {e779fb5dff41b65bce1aa38fdca4a376}, intrahash = {56f2940d5d0f2ce59c342d3b8ad42ca1}, issn = {0302-9743}, journal = {Lecture Notes in Computer Science}, number = 6202, pages = {61--74}, publisher = {Springer}, title = {Ubiquitous Data}, url = {http://rd.springer.com/content/pdf/10.1007%2F978-3-642-16392-0_4.pdf}, year = 2010 } @article{morstatter2013sample, author = {Morstatter, Fred and {\"u}rgen Pfeffer, J and Liu, Huan and Carley, Kathleen M}, interhash = {bca742d25a5f5fa43c8f106460449b5b}, intrahash = {58707a28cc5098b9b3444501d5ca9a88}, title = {Is the Sample Good Enough? Comparing Data from Twitter’s Streaming API with Twitter’s Firehose}, url = {http://scholar.google.de/scholar.bib?q=info:NkS2afIrqyQJ:scholar.google.com/&output=citation&hl=de&as_sdt=0,5&ct=citation&cd=0}, year = 2013 } @article{RePEc:eee:csdana:v:41:y:2002:i:1:p:59-90, abstract = {No abstract is available for this item.}, author = {Dhillon, Inderjit S. and Modha, Dharmendra S. and Spangler, W. Scott}, interhash = {3ff82dddf6ce4d86909347824554ddf8}, intrahash = {03e92f40796a0093a6e882a83f5cd995}, journal = {Computational Statistics \& Data Analysis}, month = {November}, number = 1, pages = {59-90}, title = {Class visualization of high-dimensional data with applications}, url = {http://www.cs.utexas.edu/~inderjit/public_papers/csda.pdf}, volume = 41, year = 2002 } @inproceedings{bullock2011tagging, author = {Bullock, Beate Navarro and Jäschke, Robert and Hotho, Andreas}, booktitle = {Proceedings of the ACM WebSci'11}, interhash = {7afaa67dfeb07f7e0b85abf2be61aff1}, intrahash = {493e03868a98f498628cad31f9320e9f}, month = {June}, title = {Tagging data as implicit feedback for learning-to-rank}, url = {http://journal.webscience.org/463/}, year = 2011 } @article{Song19022010, abstract = {A range of applications, from predicting the spread of human and electronic viruses to city planning and resource management in mobile communications, depend on our ability to foresee the whereabouts and mobility of individuals, raising a fundamental question: To what degree is human behavior predictable? Here we explore the limits of predictability in human dynamics by studying the mobility patterns of anonymized mobile phone users. By measuring the entropy of each individual’s trajectory, we find a 93% potential predictability in user mobility across the whole user base. Despite the significant differences in the travel patterns, we find a remarkable lack of variability in predictability, which is largely independent of the distance users cover on a regular basis.}, author = {Song, Chaoming and Qu, Zehui and Blumm, Nicholas and Barabási, Albert-László}, doi = {10.1126/science.1177170}, eprint = {http://www.sciencemag.org/content/327/5968/1018.full.pdf}, interhash = {f2611a08bf6db54f86e884c05f3cb5fb}, intrahash = {a89330f8eb32ce62b5f5c9a2b4909f25}, journal = {Science}, number = 5968, pages = {1018-1021}, title = {Limits of Predictability in Human Mobility}, url = {http://www.sciencemag.org/content/327/5968/1018.abstract}, volume = 327, year = 2010 } @inproceedings{atze09, address = {Krakow, Poland}, author = {Atzmueller, Martin and Lemmerich, Florian and Krause, Beate and Hotho, Andreas}, booktitle = {7th Conference on Computer Methods and Systems}, interhash = {c226a55c0cc2dc6f261b86c09225c260}, intrahash = {014dbd07807e05a5ea9aafb2dbead39b}, month = {November}, note = {ISBN 83-916420-5-4}, title = {Who are the Spammers? Understandable Local Patterns for Concept Description}, url = {http://www.cms.agh.edu.pl/}, year = 2009 } @inproceedings{ALKH:09, author = {Atzmueller, Martin and Lemmerich, Florian and Krause, Beate and Hotho, Andreas}, booktitle = {Proc. LeGo-09: From Local Patterns to Global Models, Workshop at the 2009 European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases}, editor = {Knobbe, Johannes F\"urnkranz Arno}, interhash = {d27cd7eee4ab571ad3753a3d370141ce}, intrahash = {bb80bdcc06c8886968c453fd920dfe05}, note = {accepted}, title = {{Towards Understanding Spammers - Discovering Local Patterns for Concept Characterization and Description}}, url = {http://www.ke.tu-darmstadt.de/events/LeGo-09/04-Atzmueller.pdf}, year = 2009 } @inproceedings{anti2008krause, address = {New York, NY, USA}, author = {Krause, Beate and Schmitz, Christoph and Hotho, Andreas and Stumme, Gerd}, booktitle = {AIRWeb '08: Proceedings of the 4th international workshop on Adversarial information retrieval on the web}, doi = {http://doi.acm.org/10.1145/1451983.1451998}, interhash = {a45d40ac7776551301ad9dde5b25357f}, intrahash = {68effe5d4b9460f9388e7685310f74c2}, isbn = {978-1-60558-159-0}, location = {Beijing, China}, pages = {61--68}, publisher = {ACM}, title = {The Anti-Social Tagger - Detecting Spam in Social Bookmarking Systems}, url = {http://airweb.cse.lehigh.edu/2008/submissions/krause_2008_anti_social_tagger.pdf}, year = 2008 } @incollection{books/mit/fayyadPSU96/FayyadPS96, author = {Fayyad, Usama M. and Piatetsky-Shapiro, Gregory and Smyth, Padhraic}, booktitle = {Advances in Knowledge Discovery and Data Mining}, date = {2002-01-03}, interhash = {79663e4b1f464b82ce1ae45345dc424f}, intrahash = {e59886c68d1fc9bb4d1a8d6a1a644a60}, pages = {1-34}, title = {From Data Mining to Knowledge Discovery: An Overview.}, url = {http://dblp.uni-trier.de/db/books/collections/fayyad96.html#FayyadPS96}, year = 1996 } @article{keyhere, abstract = {The identification of the user’s intention or interest through queries that they submit to a search engine can be very useful to offer them more adequate results. In this work we present a framework for the identification of user’s interest in an automaticway, based on the analysis of query logs. This identification is made from two perspectives, the objectives or goals of auser and the categories in which these aims are situated. A manual classification of the queries was made in order to havea reference point and then we applied supervised and unsupervised learning techniques. The results obtained show that fora considerable amount of cases supervised learning is a good option, however through unsupervised learning we found relationshipsbetween users and behaviors that are not easy to detect just taking the query words. Also, through unsupervised learning weestablished that there are categories that we are not able to determine in contrast with other classes that were not consideredbut naturally appear after the clustering process. This allowed us to establish that the combination of supervised and unsupervisedlearning is a good alternative to find user’s goals. From supervised learning we can identify the user interest given certainestablished goals and categories; on the other hand, with unsupervised learning we can validate the goals and categories used,refine them and select the most appropriate to the user’s needs.}, author = {Baeza-Yates, Ricardo and Calderón-Benavides, Liliana and González-Caro, Cristina}, interhash = {92e5f2f5208b5ce2f066dd361ae15758}, intrahash = {27c7357d3337d890fef53168dce9ed33}, journal = {String Processing and Information Retrieval}, pages = {98--109}, title = {The Intention Behind Web Queries}, url = {http://dx.doi.org/10.1007/11880561_9}, year = 2006 } @book{0387954333, asin = {0387954333}, author = {Weiss, Sholom M. and Indurkhya, Nitin and Zhang, T.}, dewey = {006.312}, ean = {9780387954332}, edition = 1, interhash = {d75b9da07cf40d54a79e6d8995f78a31}, intrahash = {6ac07561b543e6033fd4c9811d0dccad}, isbn = {0387954333}, publisher = {Springer, Berlin}, title = {Text Mining. Predictive Methods for Analyzing Unstructured Information}, url = {http://www.amazon.de/gp/redirect.html%3FASIN=0387954333%26tag=ws%26lcode=xm2%26cID=2025%26ccmID=165953%26location=/o/ASIN/0387954333%253FSubscriptionId=13CT5CVB80YFWJEPWS02}, year = 2004 } @book{Berendt2007, abstract = {This book constitutes the refereed proceedings of the Workshop on Web Mining, WebMine 2006, held in Berlin, Germany, September 18th, 2006. Topics included are data mining based on analysis of bloggers and tagging, web mining, XML mining and further techniques of knowledge discovery. The book is especially valuable for those interested in the aspects of the Social Web (Web 2.0) and its inherent dynamic and diversity of user-generated content.}, date = {September 18, 2006 Series:}, editor = {Berendt, B. and Hotho, A. and Mladenic, D. and Semeraro, G.}, interhash = {44843d4fe175e66198b58137dd924f44}, intrahash = {8aa8d9bcbb5a5bb3fc480d1e53b27236}, isbn = {978-3-540-74950-9}, location = {Berlin, Germany}, publisher = {Springer}, series = {LNCS}, title = {From Web to Social Web: Discovering and Deploying User and Content Profiles }, url = {http://www.springer.com/dal/home?SGWID=1-102-22-173759307-0&changeHeader=true&referer=www.springeronline.com&SHORTCUT=www.springer.com/978-3-540-74950-9}, vgwort = {279}, volume = 4736, year = 2007 } @article{1276056, address = {Amsterdam, The Netherlands, The Netherlands}, author = {Wurst, Michael and Morik, Katharina}, doi = {http://dx.doi.org/10.1016/j.future.2006.04.004}, interhash = {591e4b107d4a5cafc0cf49e07fad72e0}, intrahash = {e5eba80e58b4532a3fd3bcf50994734e}, issn = {0167-739X}, journal = {Future Gener. Comput. Syst.}, number = 1, pages = {69--75}, publisher = {Elsevier Science Publishers B. V.}, title = {Distributed feature extraction in a p2p setting: a case study}, url = {http://portal.acm.org/citation.cfm?id=1276056}, volume = 23, year = 2007 } @article{flajolet85probabilistic, author = {Flajolet, Philippe and Martin, G. Nigel}, interhash = {e937032c1207bd067f622628c469aeba}, intrahash = {7ccb5baa7d4e0836127b21468c92a27d}, journal = {Journal of Computer and System Sciences}, number = 2, pages = {182-209}, title = {Probabilistic Counting Algorithms for Data Base Applications}, url = {http://citeseer.ist.psu.edu/flajolet85probabilistic.html}, volume = 31, year = 1985 } @inproceedings{conf/ACMse/BalakrishnanD06, author = {Balakrishnan, Hemant and Deo, Narsingh}, booktitle = {ACM Southeast Regional Conference}, crossref = {conf/ACMse/2006}, date = {2006-12-18}, editor = {Menezes, Ronaldo}, ee = {http://doi.acm.org/10.1145/1185448.1185512}, interhash = {e5777f1470aec341ee766ab04febb69a}, intrahash = {7efa21cb8537359f6995cde9c307d181}, isbn = {1-59593-315-8}, pages = {280-285}, publisher = {ACM}, title = {Discovering communities in complex networks.}, url = {http://www.cs.ucf.edu/csdept/faculty/deo/ACMSE-06.pdf}, year = 2006 } @book{thrun2001, asin = {0262201623}, author = {Thrun, Sebastian and Burgard, Wolfram and Fox, Dieter}, interhash = {f1b5ed6b916d753f4e24749056ab745e}, intrahash = {914a56f048c863f0928bb6d1efe09ff7}, isdn = {978-0-262-20162-9}, title = {Probabilistic Robotics (Intelligent Robotics and Autonomous Agents)}, typesource = {Simple CitationSource}, url = {http://www.amazon.com/Probabilistic-Robotics-Intelligent-Autonomous-Agents/dp/0262201623/ref=sr_11_1/105-3361811-4085215?ie=UTF8&qid=1190743235&sr=11-1}, year = 2001 } @book{buitelaar05ontologylearningbook, editor = {Buitelaar, Paul and Cimiano, Philipp and Magnini, Bernardo}, interhash = {9a5beec1eb7d58ead91f134915be86ab}, intrahash = {0e71ddd52894af0e681b9d9411f7944f}, month = JUL, publisher = {IOS Press}, series = {Frontiers in Artificial Intelligence}, title = {Ontology Learning from Text: Methods, Evaluation and Applications}, volume = 123, year = 2005 } @inbook{baldi03modelling, abstract = {Modeling the Internet and the Web covers the most important aspects of modeling the Web using a modern mathematical and probabilistic treatment. It focuses on the information and application layers, as well as some of the emerging properties of the Internet.  Provides a comprehensive introduction to the modeling of the Internet and the Web at the information level.  Takes a modern approach based on mathematical, probabilistic, and graphical modeling.  Provides an integrated presentation of theory, examples, exercises and applications.  Covers key topics such as text analysis, link analysis, crawling techniques, human behaviour, and commerce on the Web. Interdisciplinary in nature, Modeling the Internet and the Web will be of interest to students and researchers from a variety of disciplines including computer science, machine learning, engineering, statistics, economics, business, and the social sciences.}, author = {Baldi, Pierre and Frasconi, Paolo and Smyth, Padhraic}, booktitle = {Modeling the Internet and the Web: Probabilistic Methods and Algorithms}, citeulike-article-id = {822915}, interhash = {416f2405193ae7d30cffe673dee89df2}, intrahash = {3e4e2899e7d6988218d02a264bcfe24a}, month = {April}, priority = {2}, publisher = {Wiley}, title = {Modeling the Internet and the Web: Probabilistic Methods and Algorithms}, url = {http://eu.wiley.com/WileyCDA/WileyTitle/productCd-0470849061.html}, year = 2003 } @proceedings{DBLP:conf/kdd/1999web, bibsource = {DBLP, http://dblp.uni-trier.de}, booktitle = {WEBKDD}, editor = {Masand, Brij M. and Spiliopoulou, Myra}, interhash = {29a69416c66bd604c4599009915dc0b0}, intrahash = {18a9697e8ca04f637487e79b6be9cc83}, isbn = {3-540-67818-2}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, title = {Web Usage Analysis and User Profiling, International WEBKDD'99 Workshop, San Diego, California, USA, August 15, 1999, Revised Papers}, volume = 1836, year = 2000 } @book{books/mk/Pyle99, author = {Pyle, Dorian}, date = {2002-01-28}, interhash = {3edec307e8a02fa778ee847eccfb4215}, intrahash = {29f6bc4833269393dabf92bae3afa905}, isbn = {1-55860-529-0}, publisher = {Morgan Kaufmann}, title = {Data Preparation for Data Mining}, year = 1999 } @article{Washio:Motoda:03, abstract = { The need for mining structured data has increased in the past few years. One of the best studied data structures in computer science and discrete mathematics are graphs. It can therefore be no surprise that graph based data mining has become quite popular in the last few years.This article introduces the theoretical basis of graph based data mining and surveys the state of the art of graph-based data mining. Brief descriptions of some representative approaches are provided as well. }, address = {New York, NY, USA}, author = {Washio, Takashi and Motoda, Hiroshi}, interhash = {d2592c51759257bde584dc92196512d0}, intrahash = {0c5107f6feec6fcdc2e791a0b12ce448}, journal = {SIGKDD Explorations}, number = 1, pages = {59--68}, publisher = {ACM Press}, title = {State of the art of graph-based data mining}, url = {http://doi.acm.org/10.1145/959242.959249}, volume = 5, year = 2003 } @article{wikipediaxml:2005, author = {Denoyer, Ludovic and Gallinari, Patrick}, interhash = {0e9b9afb15804d3e625d73ada85900b1}, intrahash = {493b849942fcaf9ba8e8e68e3cb46d38}, journal = {SIGIR Forum}, title = {{T}he {W}ikipedia {X}{M}{L} {C}orpus}, url = {http://www-connex.lip6.fr/~denoyer/wikipediaXML/}, year = 2006 } @misc{Newman+Hettich+Blake+Merz:1998, author = {Newman, C.L. Blake D.J. and Merz, C.J.}, institution = {University of California, Irvine, Dept. of Information and Computer Sciences}, interhash = {325a6b5ac5a1f044943d4298c21d31dd}, intrahash = {85308db3df761f63f16a7cab4eb8d4aa}, title = {{UCI} Repository of machine learning databases}, url = {http://www.ics.uci.edu/$\sim$mlearn/MLRepository.html}, year = 1998 } @book{books/mit/FayyadPSU96, editor = {Fayyad, Usama M. and Piatetsky-Shapiro, Gregory and Smyth, Padhraic and Uthurusamy, Ramasamy}, interhash = {c11811ccd720de5dad0ffea4741725f0}, intrahash = {3553c3acc971c03813352c40afe7476a}, isbn = {0-262-56097-6}, publisher = {AAAI/MIT Press}, title = {Advances in Knowledge Discovery and Data Mining.}, url = {http://www.amazon.com/gp/product/0262560976}, year = 1996 } @article{Fay96, author = {Fayyad, U. and Piatetsky-Shapiro, G. and Smyth, P.}, interhash = {971272cb912769da4f85aab25536354b}, intrahash = {eb5f2e6742d9520453cbf9d100cacfd2}, journal = {AI Magazine}, pages = {37-54}, title = {From Data Mining to Knowledge Discovery in Databases}, url = {http://www.kdnuggets.com/gpspubs/aimag-kdd-overview-1996-Fayyad.pdf}, year = 1996 } @book{mitchell97, author = {Mitchell, Tom M.}, interhash = {479a66c32badb3a455fbdcf8e6633a5d}, intrahash = {3e79734ee1a6e49aee02ffd108224d1c}, publisher = {McGraw-Hill}, title = {Machine Learning}, year = 1997 } @article{maitra02, author = {Maitra, R.}, interhash = {7dfd1fe3496527a4253733a06f990fcf}, intrahash = {5c9b8ae716d306921bb1c64cefa2ab84}, isabout = {Statistical and Data and Data_Mining and Computing_Methodologies and ACMTopic and Database_Management and Information_Systems and Image_Representation and Database_Applications and Image_Processing_And_Computer_Vision}, journal = {J. Ind. Soc. Prob. Statist.}, key = {maitra02}, label = {A Statistical Perspective on Data Mining}, title = {A Statistical Perspective on Data Mining}, type = {Article}, url = {http://www.public.iastate.edu/~maitra/papers/datamining.pdf}, year = 2002 } @book{Handetal01, author = {Hand, D. and Mannila, H. and Smyth, P.}, interhash = {1d7f5d27a1dfea47e1ac543fcb027c0d}, intrahash = {e2e195a4102d09f2ed4976bf102af01d}, location = {Santa Barbara, CA}, publisher = {Cambridge, MA: MIT Press}, title = {Principles of Data Mining}, year = 2001 }