@inproceedings{ring2015condist, author = {Ring, Markus and Otto, Florian and Becker, Martin and Niebler, Thomas and Landes, Dieter and Hotho, Andreas}, editor = {ECMLPKDD2015}, interhash = {c062a57a17a0910d6c27ecd664502ac1}, intrahash = {a2f9d649f2856677e4d886a3b517404d}, title = {ConDist: A Context-Driven Categorical Distance Measure}, year = 2015 } @proceedings{cellier2014proceedings, bibsource = {dblp computer science bibliography, http://dblp.org}, editor = {Cellier, Peggy and Charnois, Thierry and Hotho, Andreas and Matwin, Stan and Moens, Marie{-}Francine and Toussaint, Yannick}, interhash = {212d282598a034c37510c1c08c4f3a34}, intrahash = {cfb7265080d484cfda32e1fbdaff361f}, publisher = {CEUR-WS.org}, series = {{CEUR} Workshop Proceedings}, title = {Proceedings of the 1st International Workshop on Interactions between Data Mining and Natural Language Processing co-located with The European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases, DMNLP@PKDD/ECML 2014, Nancy, France, September 15, 2014}, url = {http://ceur-ws.org/Vol-1202}, volume = 1202, year = 2014 } @article{hotho2010ubiquitous, author = {Hotho, Andreas and {Ulslev Pedersen}, Rasmus and Wurst, Michael}, interhash = {e779fb5dff41b65bce1aa38fdca4a376}, intrahash = {56f2940d5d0f2ce59c342d3b8ad42ca1}, issn = {0302-9743}, journal = {Lecture Notes in Computer Science}, number = 6202, pages = {61--74}, publisher = {Springer}, title = {Ubiquitous Data}, url = {http://rd.springer.com/content/pdf/10.1007%2F978-3-642-16392-0_4.pdf}, year = 2010 } @electronic{han2012mining, address = {Waltham, Mass.}, author = {Han, Jiawei and Kamber, Micheline and Pei, Jian}, interhash = {247a70f1f22ce1914e46d7ff6f43e378}, intrahash = {beb274b9aeaebb87f5423781b6839f54}, isbn = {0123814790}, publisher = {Morgan Kaufmann Publishers}, refid = {818321921}, title = {Data mining concepts and techniques, third edition}, url = {http://www.amazon.de/Data-Mining-Concepts-Techniques-Management/dp/0123814790/ref=tmm_hrd_title_0?ie=UTF8&qid=1366039033&sr=1-1}, year = 2012 } @book{han2011mining, address = {Amsterdam [u.a.]}, author = {Han, Jiawei and Kamber, Micheline}, interhash = {c94d7099d50ba439ee4579de99af285c}, intrahash = {1cf9e5362e9f34194abad46a4ff9c771}, isbn = {9780123814791 0123814790}, publisher = {Elsevier/Morgan Kaufmann}, refid = {734060711}, title = {Data mining : concepts and techniques}, url = {http://www.amazon.de/Data-Mining-Practical-Techniques-Management/dp/0123748569/ref=sr_1_2?ie=UTF8&qid=1366038862&sr=8-2&keywords=Data+mining}, year = 2011 } @book{north2012mining, author = {North, Matthew}, interhash = {c2a4d59bda60a400d2f1d45eefe68c93}, intrahash = {b382402685b65047b826d48260dca7d6}, isbn = {9780615684376 0615684378}, refid = {814299849}, title = {Data mining for the masses}, url = {http://www.amazon.de/Data-Mining-Masses-Matthew-North/dp/0615684378/ref=sr_1_1?s=books-intl-de&ie=UTF8&qid=1366038800&sr=1-1&keywords=rapidminer}, year = 2012 } @inproceedings{conf/birthday/BloehdornBCGHLMMSSV11, author = {Bloehdorn, Stephan and Blohm, Sebastian and Cimiano, Philipp and Giesbrecht, Eugenie and Hotho, Andreas and Lösch, Uta and Mädche, Alexander and Mönch, Eddie and Sorg, Philipp and Staab, Steffen and Völker, Johanna}, booktitle = {Foundations for the Web of Information and Services}, crossref = {conf/birthday/2011studer}, editor = {Fensel, Dieter}, ee = {http://dx.doi.org/10.1007/978-3-642-19797-0_7}, interhash = {db48314326a36fc4ac8770cba2c20e49}, intrahash = {21be5153a8f491c9f209d57ce7662387}, isbn = {978-3-642-19796-3}, pages = {115-142}, publisher = {Springer}, title = {Combining Data-Driven and Semantic Approaches for Text Mining.}, url = {http://dblp.uni-trier.de/db/conf/birthday/studer2011.html#BloehdornBCGHLMMSSV11}, year = 2011 } @proceedings{conf/ht/2010msmmuse, booktitle = {MSM/MUSE}, editor = {Atzmueller, Martin and Hotho, Andreas and Strohmaier, Markus and Chin, Alvin}, ee = {http://dx.doi.org/10.1007/978-3-642-23599-3}, interhash = {2be9c4f31fd94e24d902520195b653d3}, intrahash = {4cf42ebabd9a670c70bee456affda285}, isbn = {978-3-642-23598-6}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, title = {Analysis of Social Media and Ubiquitous Data - International Workshops MSM 2010, Toronto, Canada, June 13, 2010, and MUSE 2010, Barcelona, Spain, September 20, 2010, Revised Selected Papers}, url = {http://dblp.uni-trier.de/db/conf/ht/msmmuse2010.html}, volume = 6904, year = 2011 } @article{Berkhin05asurvey, abstract = {Abstract. This survey reviews the research related to PageRank computing. Components of a PageRank vector serve as authority weights for web pages independent of their textual content, solely based on the hyperlink structure of the web. PageRank is typically used as a web search ranking component. This defines the importance of the model and the data structures that underly PageRank processing. Computing even a single PageRank is a difficult computational task. Computing many PageRanks is a much more complex challenge. Recently, significant effort has been invested in building sets of personalized PageRank vectors. PageRank is also used in many diverse applications other than ranking. We are interested in the theoretical foundations of the PageRank formulation, in the acceleration of PageRank computing, in the effects of particular aspects of web graph structure on the optimal organization of computations, and in PageRank stability. We also review alternative models that lead to authority indices similar to PageRank and the role of such indices in applications other than web search. We also discuss linkbased search personalization and outline some aspects of PageRank infrastructure from associated measures of convergence to link preprocessing. 1.}, author = {Berkhin, Pavel}, interhash = {a0b85e8e85f88c262934f5fdd05525af}, intrahash = {50de350b2ae298909eef39a11d0f682c}, journal = {Internet Mathematics}, pages = {73--120}, title = {A survey on pagerank computing}, url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.102.2294}, volume = 2, year = 2005 } @article{Hill:September_2006:1061-8600:584, abstract = {A dynamic network is a special type of network composed of connected transactors which have repeated evolving interaction. Data on large dynamic networks such as telecommunications networks and the Internet are pervasive. However, representing dynamic networks in a manner that is conducive to efficient large-scale analysis is a challenge. In this article, we represent dynamic graphs using a data structure introduced in an earlier article. We advocate their representation because it accounts for the evolution of relationships between transactors through time, mitigates noise at the local transactor level, and allows for the removal of stale relationships. Our work improves on their heuristic arguments by formalizing the representation with three tunable parameters. In doing this, we develop a generic framework for evaluating and tuning any dynamic graph. We show that the storage saving approximations involved in the representation do not affect predictive performance, and typically improve it. We motivate our approach using a fraud detection example from the telecommunications industry, and demonstrate that we can outperform published results on the fraud detection task. In addition, we present a preliminary analysis on Web logs and e-mail networks. }, author = {Hill, Shawndra and Agarwal, Deepak K. and Bell, Robert and Volinsky, Chris}, doi = {doi:10.1198/106186006X139162}, interhash = {0bef8e24366140d674636ff4f032a8de}, intrahash = {c4c90214919c4edb8da5d69b78e5180b}, journal = {Journal of Computational & Graphical Statistics}, month = {September }, pages = {584-608(25)}, title = {Building an Effective Representation for Dynamic Networks}, url = {http://www.ingentaconnect.com/content/asa/jcgs/2006/00000015/00000003/art00006}, volume = 15, year = 2006 } @incollection{Fayyad:1996:DMK:257938.257942, acmid = {257942}, address = {Menlo Park, CA, USA}, author = {Fayyad, Usama M. and Piatetsky-Shapiro, Gregory and Smyth, Padhraic}, chapter = {From data mining to knowledge discovery: an overview}, editor = {Fayyad, Usama M. and Piatetsky-Shapiro, Gregory and Smyth, Padhraic and Uthurusamy, Ramasamy}, interhash = {e62d85a492bbc917f43a5d9c8b775189}, intrahash = {d0b54b224b992e51d892d0f06d45cf6b}, isbn = {0-262-56097-6}, numpages = {34}, pages = {1--34}, publisher = {American Association for Artificial Intelligence}, title = {Advances in knowledge discovery and data mining}, url = {http://portal.acm.org/citation.cfm?id=257938.257942}, year = 1996 } @incollection{springerlink:10.1007/978-3-642-14000-6_4, abstract = {Social resource sharing systems are central elements of the Web 2.0 and use all the same kind of lightweight knowledge representation, called folksonomy. As these systems are easy to use, they attract huge masses of users. Data Mining provides methods to analyze data and to learn models which can be used to support users. The application and adaptation of known data mining algorithms to folksonomies with the goal to support the users of such systems and to extract valuable information with a special focus on the Semantic Web is the main target of this paper. In this work we give a short introduction into folksonomies with a focus on our own system BibSonomy. Based on the analysis we made on a large folksonomy dataset, we present the application of data mining algorithms on three different tasks, namely spam detection, ranking and recommendation. To bridge the gap between folksonomies and the Semantic Web, we apply association rule mining to extract relations and present a deeper analysis of statistical measures which can be used to extract tag relations. This approach is complemented by presenting two approaches to extract conceptualizations from folksonomies.}, address = {Berlin / Heidelberg}, affiliation = {University of Kassel Knowledge & Data Engineering Group 34121 Kassel Germany}, author = {Hotho, Andreas}, booktitle = {Intelligent Information Access}, doi = {10.1007/978-3-642-14000-6_4}, editor = {Armano, Giuliano and de Gemmis, Marco and Semeraro, Giovanni and Vargiu, Eloisa}, interhash = {8995ce2ba8835668820622f8aae908fe}, intrahash = {1a7906f61b76a87f618e0db657f5c6d9}, pages = {57-82}, publisher = {Springer}, series = {Studies in Computational Intelligence}, title = {Data Mining on Folksonomies}, url = {http://dx.doi.org/10.1007/978-3-642-14000-6_4}, volume = 301, year = 2010 } @article{Berendt201095, author = {Berendt, Bettina and Hotho, Andreas and Stumme, Gerd}, doi = {DOI: 10.1016/j.websem.2010.04.008}, interhash = {4969eb2b7bf1fabe60c5f23ab6383d77}, intrahash = {f8d7bc2af5753906dc3897196daac18c}, issn = {1570-8268}, journal = {Web Semantics: Science, Services and Agents on the World Wide Web}, note = {Bridging the Gap--Data Mining and Social Network Analysis for Integrating Semantic Web and Web 2.0; The Future of Knowledge Dissemination: The Elsevier Grand Challenge for the Life Sciences}, number = {2-3}, pages = {95 - 96}, title = {Bridging the Gap--Data Mining and Social Network Analysis for Integrating Semantic Web and Web 2.0}, url = {http://www.sciencedirect.com/science/article/B758F-4YXK4HW-1/2/4cb514565477c54160b5e6eb716c32d7}, volume = 8, year = 2010 } @inproceedings{dunkels04contiki, address = {Tampa, Florida, USA}, author = {Dunkels, A. and Grönvall, B. and Voigt, T.}, booktitle = {Workshop on Embedded Networked Sensors}, interhash = {50be1346437f09bd5f9f4970ce5e058e}, intrahash = {66a34b97925bdd021fea95fb6b541825}, month = {November}, title = {Contiki - a Lightweight and Flexible Operating System for Tiny Networked Sensors}, year = 2004 } @article{375731, abstract = {A data-integration system provides access to a multitude of data sources through a single mediated schema. A key bottleneck in building such systems has been the laborious manual construction of semantic mappings between the source schemas and the mediated schema. We describe LSD, a system that employs and extends current machine-learning techniques to semi-automatically find such mappings. LSD first asks the user to provide the semantic mappings for a small set of data sources, then uses these mappings together with the sources to train a set of learners. Each learner exploits a different type of information either in the source schemas or in their data. Once the learners have been trained, LSD finds semantic mappings for a new data source by applying the learners, then combining their predictions using a meta-learner. To further improve matching accuracy, we extend machine learning techniques so that LSD can incorporate domain constraints as an additional source of knowledge, and develop a novel learner that utilizes the structural information in XML documents. Our approach thus is distinguished in that it incorporates multiple types of knowledge. Importantly, its architecture is extensible to additional learners that may exploit new kinds of information. We describe a set of experiments on several real-world domains, and show that LSD proposes semantic mappings with a high degree of accuracy.}, address = {New York, NY, USA}, author = {Doan, AnHai and Domingos, Pedro and Halevy, Alon Y.}, doi = {http://doi.acm.org/10.1145/376284.375731}, interhash = {1550f1948858bf8b315ea2fc6ed789cd}, intrahash = {29e7660361ca79b97b00e5db51fb66ee}, issn = {0163-5808}, journal = {SIGMOD Rec.}, number = 2, pages = {509--520}, publisher = {ACM}, title = {Reconciling schemas of disparate data sources: a machine-learning approach}, url = {http://portal.acm.org/citation.cfm?id=375731&dl=GUIDE&coll=GUIDE&CFID=75153142&CFTOKEN=89522229}, volume = 30, year = 2001 } @inproceedings{4597173, abstract = {One challenge for relevance ranking in Web search is underspecified queries. For such queries, top-ranked documents may contain information irrelevant to the search goal of the user; some newly-created relevant documents are ranked lower due to their freshness and to the large number of existing documents that match the queries. To improve the relevance ranking for underspecified queries requires better understanding of users' search goals. By analyzing the semantic query context extracted from the query logs, we propose Q-Rank to effectively improve the ranking of search results for a given query. Experiments show that Q-Rank outperforms the current ranking system of a large-scale commercial Web search engine, improving the relevance ranking for 82% of the queries with an average increase of 8.99% in terms of discounted cumulative gains. Because Q-Rank is independent of the underlying ranking algorithm, it can be integrated with existing search engines.}, author = {Zhuang, Z. and Cucerzan, S.}, doi = {10.1109/ICSC.2008.8}, interhash = {fd70fee1920ea227a8c336fe80e2ba71}, intrahash = {8c2005e1dea667cdd23a8e5c7efe9243}, journal = {Semantic Computing, 2008 IEEE International Conference on}, month = {Aug.}, pages = {50-57}, title = {Exploiting Semantic Query Context to Improve Search Ranking}, year = 2008 } @incollection{1420085867, asin = {1420085867}, author = {May, Michael and Berendt, Bettina and Cornuéjols, Antoine and Gama, Jõao and Giannotti, Fosca and Hotho, Andreas and Malerba, Donato and Menesalvas, Ernestina and Morik, Katharina and Pedersen, Rasmus and Saitta, Lorenza and Saygin, Yücel and Schuster, Assaf and Vanhoof, Koen}, booktitle = {Next Generation of Data Mining (Chapman & Hall/Crc Data Mining and Knowledge Discovery Series)}, dewey = {005.74}, ean = {9781420085860}, edition = 1, interhash = {7aeb3b998b5918d86093e05601e81b4d}, intrahash = {be3c753af98ab591b4f31d349513b461}, isbn = {1420085867}, isbn13 = {9781420085860}, publisher = {Chapman & Hall/CRC}, title = {Research Challenges in Ubiquitous Knowledge Discovery}, url = {http://208.254.79.11/shopping_cart/products/product_contents.asp?id=&parent_id=497&sku=C5867&isbn=9781420085860&pc=}, year = 2008 } @book{Berendt2007, abstract = {This book constitutes the refereed proceedings of the Workshop on Web Mining, WebMine 2006, held in Berlin, Germany, September 18th, 2006. Topics included are data mining based on analysis of bloggers and tagging, web mining, XML mining and further techniques of knowledge discovery. The book is especially valuable for those interested in the aspects of the Social Web (Web 2.0) and its inherent dynamic and diversity of user-generated content.}, date = {September 18, 2006 Series:}, editor = {Berendt, B. and Hotho, A. and Mladenic, D. and Semeraro, G.}, interhash = {44843d4fe175e66198b58137dd924f44}, intrahash = {8aa8d9bcbb5a5bb3fc480d1e53b27236}, isbn = {978-3-540-74950-9}, location = {Berlin, Germany}, publisher = {Springer}, series = {LNCS}, title = {From Web to Social Web: Discovering and Deploying User and Content Profiles }, url = {http://www.springer.com/dal/home?SGWID=1-102-22-173759307-0&changeHeader=true&referer=www.springeronline.com&SHORTCUT=www.springer.com/978-3-540-74950-9}, vgwort = {279}, volume = 4736, year = 2007 } @inproceedings{maedche_dawak00, author = {Maedche, Alexander and Hotho, Andreas and Wiese, Markus}, booktitle = {Data Warehousing and Knowledge Discovery, Second International Conference, DaWaK 2000, London, UK}, interhash = {1e7d8a82c72a243056166e606ddb4fd6}, intrahash = {0e45a2cba1a2901dbcfe09069cb2695d}, pages = {258-264}, publisher = {Springer}, series = {LNCS}, title = {Enhancing Preprocessing in Data-Intensive Domains using Online-Analytical Processing}, volume = 1874, year = 2000 } @book{books/mk/Pyle99, author = {Pyle, Dorian}, date = {2002-01-28}, interhash = {3edec307e8a02fa778ee847eccfb4215}, intrahash = {29f6bc4833269393dabf92bae3afa905}, isbn = {1-55860-529-0}, publisher = {Morgan Kaufmann}, title = {Data Preparation for Data Mining}, year = 1999 } @article{wikipediaxml:2005, author = {Denoyer, Ludovic and Gallinari, Patrick}, interhash = {0e9b9afb15804d3e625d73ada85900b1}, intrahash = {493b849942fcaf9ba8e8e68e3cb46d38}, journal = {SIGIR Forum}, title = {{T}he {W}ikipedia {X}{M}{L} {C}orpus}, url = {http://www-connex.lip6.fr/~denoyer/wikipediaXML/}, year = 2006 } @article{han98hypergraph, author = {Han, Eui-Hong and Karypis, George and Kumar, Vipin and Mobasher, Bamshad}, interhash = {3bb7fb3fd3af41fac2db5460a5acfd2c}, intrahash = {9723b092d975dedb8f6d5f711bb00ffd}, journal = {Data Engineering Bulletin}, number = 1, pages = {15-22}, title = {Hypergraph Based Clustering in High-Dimensional Data Sets: A Summary of Results}, url = {http://citeseer.ist.psu.edu/han98hypergraph.html}, volume = 21, year = 1998 } @misc{Newman+Hettich+Blake+Merz:1998, author = {Newman, C.L. Blake D.J. and Merz, C.J.}, institution = {University of California, Irvine, Dept. of Information and Computer Sciences}, interhash = {325a6b5ac5a1f044943d4298c21d31dd}, intrahash = {85308db3df761f63f16a7cab4eb8d4aa}, title = {{UCI} Repository of machine learning databases}, url = {http://www.ics.uci.edu/$\sim$mlearn/MLRepository.html}, year = 1998 } @book{citeulike:340715, author = {Witten, Ian H. and Frank, Eibe}, citeulike-article-id = {340715}, edition = {Second}, howpublished = {Paperback}, interhash = {6d7648942e58f322a39a1e8c37edfec6}, intrahash = {57ade2d873735d4c54d44365dafa7605}, isbn = {0120884070}, month = {June}, priority = {0}, publisher = {Morgan Kaufmann}, series = {Morgan Kaufmann Series in Data Management Sys}, title = {Data Mining: Practical Machine Learning Tools and Techniques}, url = {http://www.amazon.fr/exec/obidos/ASIN/0120884070/citeulike04-21}, year = 2005 } @book{books/mit/FayyadPSU96, editor = {Fayyad, Usama M. and Piatetsky-Shapiro, Gregory and Smyth, Padhraic and Uthurusamy, Ramasamy}, interhash = {c11811ccd720de5dad0ffea4741725f0}, intrahash = {3553c3acc971c03813352c40afe7476a}, isbn = {0-262-56097-6}, publisher = {AAAI/MIT Press}, title = {Advances in Knowledge Discovery and Data Mining.}, url = {http://www.amazon.com/gp/product/0262560976}, year = 1996 } @article{Laenderetal02, author = {Laender, A.H.F. and Ribeiro-Neto, B.A. and da Silva, A.S. and Teixeira, J.S.}, bb-month = {August 2002}, interhash = {92cdfc3f27b81b09375ec9524eff07e0}, intrahash = {eff818caa06f636279e67308aeffe6f6}, journal = {SIGMOD Record}, location = {Helsinki, Finland}, number = 2, pages = {84--93}, title = {A Brief Survey of Web Data Extraction Tools}, volume = 31, year = 2002 } @inbook{kleinberg2006temporal, author = {Kleinberg, J.}, booktitle = {Data Stream Management: Processing High-Speed Data Streams}, editor = {Garofalakis, M. and Gehrke, J. and Rastogi, R.}, interhash = {85abe180184277c0396745c7ce050c98}, intrahash = {9c57003d80b81eab2f66b2faf02acb27}, isbn = {3540286071}, publisher = {Springer}, title = {Temporal Dynamics of On-Line Information Streams}, url = {http://www.cs.cornell.edu/home/kleinber/stream-survey04.pdf}, year = 2006 } @article{hotho_jucs01, author = {Hotho, A. and Maedche, A. and Staab, S. and Studer, R.}, interhash = {0f997271cdc6b639ca196fc54b7d277e}, intrahash = {b9a6e840ccef27311575ed33d40bcc53}, journal = {Journal of Universal Computer Science (J.UCS)}, number = 7, pages = {566-590}, title = {{SEAL-II} --- The Soft Spot between Richly Structured and Unstructured Knowledge}, volume = 7, year = 2001 } @inproceedings{arasugarciamolina03, author = {Arasu, Arvind and Garcia-Molina, Hector}, booktitle = {Proceedings of the 2003 ACM SIGMOD International Conference on Management of Data, San Diego, California, USA, June 9-12, 2003}, editor = {Halevy, Alon Y. and Ives, Zachary G. and Doan, AnHai}, interhash = {4d5c1e9aa3fda480e5945c4c0778ba14}, intrahash = {884b6d51ca600b501cf93a59ba71ccd4}, isbn = {1-58113-634-X}, pages = {337-348}, publisher = {ACM}, title = {Extracting Structured Data from Web Pages}, year = 2003 } @incollection{Borges99, author = {Borges, J. L. and Levene, M.}, booktitle = {\cite{webkdd99book}}, interhash = {069643ae58ff3572ec20a18a383d3085}, intrahash = {aea83f9f73682beaa12066e4d28e392b}, isbn = {3-540-43760-6}, pages = {92--111}, title = {Data mining of user navigation patterns}, year = 2000 } @article{Cooleyetal99, author = {Cooley, R. and Mobasher, B. and Srivastava, J.}, interhash = {68b1e11110e6498699524008fe67f8c1}, intrahash = {e515dc2a8adbc7fa84b7fe968b61391e}, journal = {Journal of Knowledge and Information Systems}, location = {Santa Barbara, CA}, number = 1, pages = {5--32}, title = {Data preparation for mining world wide web browsing patterns}, volume = 1, year = 1999 } @article{Srivastavaetal, author = {Srivastava, J. and Cooley, R. and Deshpande, M. and Tan, P.-N.}, interhash = {08571943908ec1aa9aa5c003e79d5b8d}, intrahash = {dc941da0f5c7da937269241b0df0b3b3}, journal = {SIGKDD Explorations}, location = {Santa Barbara, CA}, number = 2, pages = {12--23}, title = {Web usage mining: discovery and application of usage patterns from web data}, url = {http://citeseer.nj.nec.com/srivastava00web.html}, volume = 1, year = 2000 } @incollection{Cooleyetal00, author = {Cooley, R. and Tang, P.-N. and Srivastava, J.}, bibsource = {DBLP, http://dblp.uni-trier.de}, booktitle = {\cite{webkdd99book}}, interhash = {4cb524eff321348696985af1d63a3e59}, intrahash = {5862558933e364680abc29bd5b4e0d84}, pages = {163--182}, title = {Discovery of interesting usage patterns from web data}, year = 2000 } @book{HanKamber01, address = {San Francisco, LA}, author = {Han and Kamber}, interhash = {f902c324cdc1b270bdf9d996ba19dca7}, intrahash = {b9884cf23229d6cb71535484424be1ff}, location = {Helsinki, Finland}, publisher = {Morgan Kaufmann}, title = {Data Mining. Concepts and Techniques}, year = 2001 } @inproceedings{gottlobetal2004, author = {Gottlob, Georg and Koch, Christoph and Baumgartner, Robert and Herzog, Marcus and Flesca, Sergio}, bibsource = {DBLP, http://dblp.uni-trier.de}, booktitle = {Proceedings of the Twenty-third ACM SIGACT-SIGMOD-SIGART Symposium on Principles of Database Systems, June 14-16, 2004, Paris, France}, interhash = {bc4c0bca239d365f573ca3b542ec36ca}, intrahash = {1cc97508ecfcb6c56e201df3065e708b}, isbn = {1-58113-858-X}, pages = {1-12}, publisher = {ACM}, title = {The Lixto Data Extraction Project - Back and Forth between Theory and Practice}, year = 2004 } @book{BerryLinoffWebMining, address = {New York}, author = {Linoff, G.S. and Berry, M.J.A.}, interhash = {da5f8a53ac15a7efdcefceb04adbd5af}, intrahash = {0b23e0a44cf267b346c67240a86b308a}, isbn = {90-74821-43-X}, publisher = {John Wiley \& Sons}, title = {Mining the Web. Transforming Customer Data into Customer Value}, year = 2001 } @article{ZaianeSimoff03, author = {Zaïane, O.R. and Simoff, S.J.}, bibsource = {DBLP, http://dblp.uni-trier.de}, interhash = {38ce81529e3b7527655b6fa0049926ea}, intrahash = {7a6bf1f7aed80780a5b73db192d3bed5}, journal = {SIGKDD Explorations}, number = 2, publisher = {ACM}, title = {MDM/KDD: Multimedia data mining for the second time}, volume = 3, year = 2003 } @inproceedings{Simoff00, address = {www.cs.ualberta.ca/~zaiane/mdm\_kdd2000/mdm00-15.pdf}, author = {Simoff, S.J.}, bibsource = {DBLP, http://dblp.uni-trier.de}, booktitle = {Proceedings of the MDKM/KDD2000 Workshop on Multimedia Data Mining}, editor = {Simoff, S.J. and Zaïane, O.R.}, interhash = {fcabb55db4171c241c2fa33bec8d3998}, intrahash = {f35304fac130bf0acf880d7141c65e7e}, pages = {104--109}, title = {Variations on multimedia data mining}, year = 2000 } @book{Handetal01, author = {Hand, D. and Mannila, H. and Smyth, P.}, interhash = {1d7f5d27a1dfea47e1ac543fcb027c0d}, intrahash = {e2e195a4102d09f2ed4976bf102af01d}, location = {Santa Barbara, CA}, publisher = {Cambridge, MA: MIT Press}, title = {Principles of Data Mining}, year = 2001 } @article{Chakrabarti2000, author = {Chakrabarti, S.}, interhash = {e54787477738b367fad5aa57523a2cfd}, intrahash = {31d8cbe4de9c172bf3284a29b0cf57bf}, journal = {SIGKDD Explorations}, location = {Santa Barbara, CA}, number = 2, pages = {1--11}, publisher = {ACM}, title = {Data mining for hypertext: A tutorial survey}, url = {citeseer.nj.nec.com/304115.html}, volume = 1, year = 2000 } @article{S99, author = {Spiliopoulou, M.}, interhash = {c14dde2b223d83bff172295b3fdcd05d}, intrahash = {e3d14f752ca6808d9fd43406bef22e6c}, journal = {International Journal of Computer Systems, Science, \& Engineering}, location = {Denver, CO}, pages = {113--126}, title = {The laborious way from data mining to Web mining}, volume = 14, year = 1999 } @book{KimballMerx00, address = {New York}, author = {Kimball, R. and Merx, R.}, bb-month = {August 2002}, interhash = {1e89ccb1dededc94cbb215fcda3bbcea}, intrahash = {3f34eebc09aa1b252423da1ab0792cca}, location = {Helsinki, Finland}, publisher = {Wiley Computer Publishing}, title = {The Data Webhouse Toolkit -- Building Web-Enabled Data Warehouse}, year = 2000 } @inproceedings{Joachims02, author = {Joachims, T.}, booktitle = {\cite{KDD2002}}, interhash = {c78df69370bbf12636eaa5233b1fba83}, intrahash = {bee23197127e6b1dfb7a34bab035eee1}, location = {Helsinki, Finland}, pages = {133--142}, title = {Optimizing Search Engines using Clickthrough Data}, year = 2002 }