@proceedings{cellier2014proceedings,
  bibsource = {dblp computer science bibliography, http://dblp.org},
  editor = {Cellier, Peggy and Charnois, Thierry and Hotho, Andreas and Matwin, Stan and Moens, Marie{-}Francine and Toussaint, Yannick},
  interhash = {212d282598a034c37510c1c08c4f3a34},
  intrahash = {cfb7265080d484cfda32e1fbdaff361f},
  publisher = {CEUR-WS.org},
  series = {{CEUR} Workshop Proceedings},
  title = {Proceedings of the 1st International Workshop on Interactions between
               Data Mining and Natural Language Processing co-located with The European
               Conference on Machine Learning and Principles and Practice of Knowledge
               Discovery in Databases, DMNLP@PKDD/ECML 2014, Nancy, France, September
               15, 2014},
  url = {http://ceur-ws.org/Vol-1202},
  volume = 1202,
  year = 2014
}

@article{atzmueller2014ubicon,
  abstract = {The combination of ubiquitous and social computing is an emerging
 research area which integrates different but complementary methods,
 techniques and tools. In this paper, we focus on the Ubicon platform,
 its applications, and a large spectrum of analysis results.
 
 
 Ubicon provides an extensible framework for building and hosting applications
 targeting both ubiquitous and social environments. We summarize the
 architecture and exemplify its implementation using four real-world
 applications built on top of Ubicon. In addition, we discuss several
 scientific experiments in the context of these applications in order
 to give a better picture of the potential of the framework, and discuss
 analysis results using several real-world data sets collected utilizing
 Ubicon.},
  author = {Atzmueller, Martin and Becker, Martin and Kibanov, Mark and Scholz, Christoph and Doerfel, Stephan and Hotho, Andreas and Macek, Bjoern-Elmar and Mitzlaff, Folke and Mueller, Juergen and Stumme, Gerd},
  doi = {10.1080/13614568.2013.873488},
  interhash = {6364e034fa868644b30618dc887c0270},
  intrahash = {176e4f2816af5fe1630ed65e062900ce},
  journal = {New Review of Hypermedia and Multimedia},
  number = 1,
  pages = {53--77},
  title = {{Ubicon and its Applications for Ubiquitous Social Computing}},
  url = {http://www.tandfonline.com/doi/abs/10.1080/13614568.2013.873488},
  volume = 20,
  year = 2014
}

@inproceedings{MASH:13b,
  address = {Bamberg, Germany},
  author = {Mitzlaff, Folke and Atzmueller, Martin and Stumme, Gerd and Hotho, Andreas},
  booktitle = {Proc. LWA 2013 (KDML Special Track)},
  interhash = {73088600a500f7d06768615d6e1c2b3d},
  intrahash = {820ffb2166b330bf60bb30b16e426553},
  publisher = {University of Bamberg},
  title = {{On the Semantics of User Interaction in Social Media (Extended Abstract, Resubmission)}},
  year = 2011
}

@article{hotho2010ubiquitous,
  author = {Hotho, Andreas and {Ulslev Pedersen}, Rasmus and Wurst, Michael},
  interhash = {e779fb5dff41b65bce1aa38fdca4a376},
  intrahash = {56f2940d5d0f2ce59c342d3b8ad42ca1},
  issn = {0302-9743},
  journal = {Lecture Notes in Computer Science},
  number = 6202,
  pages = {61--74},
  publisher = {Springer},
  title = {Ubiquitous Data},
  url = {http://rd.springer.com/content/pdf/10.1007%2F978-3-642-16392-0_4.pdf},
  year = 2010
}

@electronic{han2012mining,
  address = {Waltham, Mass.},
  author = {Han, Jiawei and Kamber, Micheline and Pei, Jian},
  interhash = {247a70f1f22ce1914e46d7ff6f43e378},
  intrahash = {beb274b9aeaebb87f5423781b6839f54},
  isbn = {0123814790},
  publisher = {Morgan Kaufmann Publishers},
  refid = {818321921},
  title = {Data mining concepts and techniques, third edition},
  url = {http://www.amazon.de/Data-Mining-Concepts-Techniques-Management/dp/0123814790/ref=tmm_hrd_title_0?ie=UTF8&qid=1366039033&sr=1-1},
  year = 2012
}

@book{han2011mining,
  address = {Amsterdam [u.a.]},
  author = {Han, Jiawei and Kamber, Micheline},
  interhash = {c94d7099d50ba439ee4579de99af285c},
  intrahash = {1cf9e5362e9f34194abad46a4ff9c771},
  isbn = {9780123814791 0123814790},
  publisher = {Elsevier/Morgan Kaufmann},
  refid = {734060711},
  title = {Data mining : concepts and techniques},
  url = {http://www.amazon.de/Data-Mining-Practical-Techniques-Management/dp/0123748569/ref=sr_1_2?ie=UTF8&qid=1366038862&sr=8-2&keywords=Data+mining},
  year = 2011
}

@book{north2012mining,
  author = {North, Matthew},
  interhash = {c2a4d59bda60a400d2f1d45eefe68c93},
  intrahash = {b382402685b65047b826d48260dca7d6},
  isbn = {9780615684376 0615684378},
  refid = {814299849},
  title = {Data mining for the masses},
  url = {http://www.amazon.de/Data-Mining-Masses-Matthew-North/dp/0615684378/ref=sr_1_1?s=books-intl-de&ie=UTF8&qid=1366038800&sr=1-1&keywords=rapidminer},
  year = 2012
}

@book{AH:12,
  address = {Bristol, UK},
  editor = {Atzmueller, Martin and Hotho, Andreas},
  interhash = {ad11fdb5a32814dc9c75b4483dccd6a7},
  intrahash = {5304ffe0848eccd3938b50c811d2556d},
  publisher = {Workshop Notes},
  title = {{Proceedings of the Third International Workshop on Mining Ubiquitous and Social Environments (MUSE 2012)}},
  url = {http://www.kde.cs.uni-kassel.de/ws/muse2012/proceedings.pdf},
  year = 2012
}

@book{ACHH:12,
  address = {Heidelberg, Germany},
  editor = {Atzmueller, Martin and Chin, Alvin and Helic, Denis and Hotho, Andreas},
  interhash = {ebf8e8b66c6c0723092e11e40998d61f},
  intrahash = {a0e5d144b39199fa4acb6319f29e7a15},
  publisher = {Springer Verlag},
  series = {Lecture Notes in Computer Science},
  title = {Modeling and Mining Ubiquitous Social Media},
  url = {http://www.springer.com/computer/ai/book/978-3-642-33683-6},
  volume = 7472,
  year = 2012
}

@inproceedings{conf/birthday/BloehdornBCGHLMMSSV11,
  author = {Bloehdorn, Stephan and Blohm, Sebastian and Cimiano, Philipp and Giesbrecht, Eugenie and Hotho, Andreas and Lösch, Uta and Mädche, Alexander and Mönch, Eddie and Sorg, Philipp and Staab, Steffen and Völker, Johanna},
  booktitle = {Foundations for the Web of Information and Services},
  crossref = {conf/birthday/2011studer},
  editor = {Fensel, Dieter},
  ee = {http://dx.doi.org/10.1007/978-3-642-19797-0_7},
  interhash = {db48314326a36fc4ac8770cba2c20e49},
  intrahash = {21be5153a8f491c9f209d57ce7662387},
  isbn = {978-3-642-19796-3},
  pages = {115-142},
  publisher = {Springer},
  title = {Combining Data-Driven and Semantic Approaches for Text Mining.},
  url = {http://dblp.uni-trier.de/db/conf/birthday/studer2011.html#BloehdornBCGHLMMSSV11},
  year = 2011
}

@inproceedings{conf/birthday/HothoS11,
  author = {Hotho, Andreas and Stumme, Gerd},
  booktitle = {Foundations for the Web of Information and Services},
  crossref = {conf/birthday/2011studer},
  editor = {Fensel, Dieter},
  ee = {http://dx.doi.org/10.1007/978-3-642-19797-0_8},
  interhash = {502dc9bea95f0c581a37cd39cae2ff09},
  intrahash = {845a2a6bf9a43be9e85741a6c7d2aa2d},
  isbn = {978-3-642-19796-3},
  pages = {143-153},
  publisher = {Springer},
  title = {From Semantic Web Mining to Social and Ubiquitous Mining - A Subjective View on Past, Current, and Future Research.},
  url = {http://dblp.uni-trier.de/db/conf/birthday/studer2011.html#HothoS11},
  year = 2011
}

@book{AH:11,
  address = {Athens, Greece},
  editor = {Atzmueller, Martin and Hotho, Andreas},
  interhash = {8215abfd0550872bdd023c532f21e257},
  intrahash = {87b0576f542aa6de352a2bff1428ce6b},
  publisher = {ECML/PKDD 2011},
  title = {{Proceedings of the 2011 International Workshop on Mining Ubiquitous and Social Environments (MUSE 2011)}},
  year = 2011
}

@inproceedings{mitzlaff2011community,
  author = {Mitzlaff, Folke and Atzmueller, Martin and Benz, Dominik and Hotho, Andreas and Stumme, Gerd},
  booktitle = {Analysis of Social Media and Ubiquitous Data},
  interhash = {1ef065a81ed836dfd31fcc4cd4da133b},
  intrahash = {0f45e870093c053e6f41f54c14bda46b},
  series = {LNAI},
  title = {{Community Assessment using Evidence Networks}},
  volume = 6904,
  year = 2011
}

@inproceedings{atzmueller2011towards,
  author = {Atzmueller, Martin and Benz, Dominik and Hotho, Andreas and Stumme, Gerd},
  booktitle = {Proceedings of the 4th international workshop on Social Data on the Web (SDoW2011)},
  editor = {Passant, Alexandre and Fernández, Sergio and Breslin, John and Bojārs, Uldis},
  interhash = {65222f0ccc23063a2a15c0a7fd5513a0},
  intrahash = {a47a41658592202811f0139d4bb65871},
  title = {Towards Mining Semantic Maturity in Social Bookmarking Systems},
  url = {http://www.kde.cs.uni-kassel.de/pub/pdf/atzmueller2011towards.pdf},
  year = 2011
}

@inproceedings{conf/icdm/YassineH10,
  author = {Yassine, Mohamed and Hajj, Hazem},
  booktitle = {ICDM Workshops},
  crossref = {conf/icdm/2010w},
  editor = {Fan, Wei and Hsu, Wynne and Webb, Geoffrey I. and Liu, Bing and Zhang, Chengqi and Gunopulos, Dimitrios and Wu, Xindong},
  ee = {http://dx.doi.org/10.1109/ICDMW.2010.75},
  interhash = {72ae8c258d6559e4a90370453ecc2acc},
  intrahash = {8b0afeee143cec94f3058c214ae38c6f},
  pages = {1136-1142},
  publisher = {IEEE Computer Society},
  title = {A Framework for Emotion Mining from Text in Online Social Networks.},
  url = {http://dblp.uni-trier.de/db/conf/icdm/icdmw2010.html#YassineH10},
  year = 2010
}

@misc{Rubin2011,
  abstract = {  Machine learning approaches to multi-label document classification have (to date) largely relied on discriminative modeling techniques such as support vector machines. A drawback of these approaches is that performance rapidly drops off as the total number of labels and the number of labels per document increase. This problem is amplified when the label frequencies exhibit the type of highly skewed distributions that are often observed in real-world datasets. In this paper we investigate a class of generative statistical topic models for multi-label documents that associate individual word tokens with different labels. We investigate the advantages of this approach relative to discriminative models, particularly with respect to classification problems involving large numbers of relatively rare labels. We compare the performance of generative and discriminative approaches on document labeling tasks ranging from datasets with several thousand labels to datasets with tens of labels. The experimental results indicate that generative models can achieve competitive multi-label classification performance compared to discriminative methods, and have advantages for datasets with many labels and skewed label frequencies. },
  author = {Rubin, Timothy N. and Chambers, America and Smyth, Padhraic and Steyvers, Mark},
  interhash = {e09d5d8587756d460a5d834025e75aac},
  intrahash = {f8a5a3958ae264d19c7f5415eb7f0bce},
  note = {cite arxiv:1107.2462},
  title = {Statistical Topic Models for Multi-Label Document Classification},
  url = {http://arxiv.org/abs/1107.2462},
  year = 2011
}

@book{srivastava2009mining,
  abstract = {Giving a broad perspective of the field from numerous vantage points, 'Text Mining' focuses on statistical methods for text mining and analysis. It examines methods to automatically cluster and classify text documents and applies these methods in a variety of areas.},
  address = {Boca Raton, FL},
  author = {Srivastava, Asho and Sahami, Mehran.},
  interhash = {290eabe518274b6fbcc73a106a7d52a6},
  intrahash = {45ab79501c114299142864becfa6c841},
  isbn = {9781420059403 1420059408},
  pages = {--},
  publisher = {CRC Press},
  refid = {144226505},
  title = {Text mining : classification, clustering, and applications},
  url = {http://www.worldcat.org/search?qt=worldcat_org_all&q=9781420059403},
  year = 2009
}

@article{march06crane,
  author = {Crane, Gregory},
  doi = {10.1045/march2006-crane},
  interhash = {36d4825e3189d89195693d1449e9aaea},
  intrahash = {eea7ae2ac1480c84f87544f2942c28f2},
  issn = {1082-9873},
  journal = {D-Lib Magazine},
  month = {March },
  number = 3,
  title = {What Do You Do with a Million Books?},
  url = {http://www.dlib.org/dlib/march06/crane/03crane.html},
  volume = 12,
  year = 2006
}

@article{Berkhin05asurvey,
  abstract = {Abstract. This survey reviews the research related to PageRank computing. Components of a PageRank vector serve as authority weights for web pages independent of their textual content, solely based on the hyperlink structure of the web. PageRank is typically used as a web search ranking component. This defines the importance of the model and the data structures that underly PageRank processing. Computing even a single PageRank is a difficult computational task. Computing many PageRanks is a much more complex challenge. Recently, significant effort has been invested in building sets of personalized PageRank vectors. PageRank is also used in many diverse applications other than ranking. We are interested in the theoretical foundations of the PageRank formulation, in the acceleration of PageRank computing, in the effects of particular aspects of web graph structure on the optimal organization of computations, and in PageRank stability. We also review alternative models that lead to authority indices similar to PageRank and the role of such indices in applications other than web search. We also discuss linkbased search personalization and outline some aspects of PageRank infrastructure from associated measures of convergence to link preprocessing. 1.},
  author = {Berkhin, Pavel},
  interhash = {a0b85e8e85f88c262934f5fdd05525af},
  intrahash = {50de350b2ae298909eef39a11d0f682c},
  journal = {Internet Mathematics},
  pages = {73--120},
  title = {A survey on pagerank computing},
  url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.102.2294},
  volume = 2,
  year = 2005
}

@incollection{Fayyad:1996:DMK:257938.257942,
  acmid = {257942},
  address = {Menlo Park, CA, USA},
  author = {Fayyad, Usama M. and Piatetsky-Shapiro, Gregory and Smyth, Padhraic},
  chapter = {From data mining to knowledge discovery: an overview},
  editor = {Fayyad, Usama M. and Piatetsky-Shapiro, Gregory and Smyth, Padhraic and Uthurusamy, Ramasamy},
  interhash = {e62d85a492bbc917f43a5d9c8b775189},
  intrahash = {d0b54b224b992e51d892d0f06d45cf6b},
  isbn = {0-262-56097-6},
  numpages = {34},
  pages = {1--34},
  publisher = {American Association for Artificial Intelligence},
  title = {Advances in knowledge discovery and data mining},
  url = {http://portal.acm.org/citation.cfm?id=257938.257942},
  year = 1996
}

@book{feldman2006mining,
  asin = {0521836573},
  author = {Feldman, Ronen and Sanger, James},
  dewey = {005.74},
  ean = {9780521836579},
  interhash = {14cb9e63c6dca7830675c73578dcc30b},
  intrahash = {a310b253098a92c9f6352f568c1a9c37},
  isbn = {0521836573},
  publisher = {Cambridge University Press},
  title = {The Text Mining Handbook: Advanced Approaches in Analyzing Unstructured Data},
  url = {http://www.amazon.com/Text-Mining-Handbook-Approaches-Unstructured/dp/0521836573/ref=sr_1_1?s=books&ie=UTF8&qid=1295265273&sr=1-1},
  year = 2007
}

@incollection{springerlink:10.1007/978-3-642-14000-6_4,
  abstract = {Social resource sharing systems are central elements of the Web 2.0 and use all the same kind of lightweight knowledge representation, called folksonomy. As these systems are easy to use, they attract huge masses of users. Data Mining provides methods to analyze data and to learn models which can be used to support users. The application and adaptation of known data mining algorithms to folksonomies with the goal to support the users of such systems and to extract valuable information with a special focus on the Semantic Web is the main target of this paper.   In this work we give a short introduction into folksonomies with a focus on our own system BibSonomy. Based on the analysis we made on a large folksonomy dataset, we present the application of data mining algorithms on three different tasks, namely spam detection, ranking and recommendation. To bridge the gap between folksonomies and the Semantic Web, we apply association rule mining to extract relations and present a deeper analysis of statistical measures which can be used to extract tag relations. This approach is complemented by presenting two approaches to extract conceptualizations from folksonomies.},
  address = {Berlin / Heidelberg},
  affiliation = {University of Kassel Knowledge &amp; Data Engineering Group 34121 Kassel Germany},
  author = {Hotho, Andreas},
  booktitle = {Intelligent Information Access},
  doi = {10.1007/978-3-642-14000-6_4},
  editor = {Armano, Giuliano and de Gemmis, Marco and Semeraro, Giovanni and Vargiu, Eloisa},
  interhash = {8995ce2ba8835668820622f8aae908fe},
  intrahash = {1a7906f61b76a87f618e0db657f5c6d9},
  pages = {57-82},
  publisher = {Springer},
  series = {Studies in Computational Intelligence},
  title = {Data Mining on Folksonomies},
  url = {http://dx.doi.org/10.1007/978-3-642-14000-6_4},
  volume = 301,
  year = 2010
}

@article{Berendt201095,
  author = {Berendt, Bettina and Hotho, Andreas and Stumme, Gerd},
  doi = {DOI: 10.1016/j.websem.2010.04.008},
  interhash = {4969eb2b7bf1fabe60c5f23ab6383d77},
  intrahash = {f8d7bc2af5753906dc3897196daac18c},
  issn = {1570-8268},
  journal = {Web Semantics: Science, Services and Agents on the World Wide Web},
  note = {Bridging the Gap--Data Mining and Social Network Analysis for Integrating Semantic Web and Web 2.0; The Future of Knowledge Dissemination: The Elsevier Grand Challenge for the Life Sciences},
  number = {2-3},
  pages = {95 - 96},
  title = {Bridging the Gap--Data Mining and Social Network Analysis for Integrating Semantic Web and Web 2.0},
  url = {http://www.sciencedirect.com/science/article/B758F-4YXK4HW-1/2/4cb514565477c54160b5e6eb716c32d7},
  volume = 8,
  year = 2010
}

@article{375731,
  abstract = {A data-integration system provides access to a multitude of data sources through a single mediated schema. A key bottleneck in building such systems has been the laborious manual construction of semantic mappings between the source schemas and the mediated schema. We describe LSD, a system that employs and extends current machine-learning techniques to semi-automatically find such mappings. LSD first asks the user to provide the semantic mappings for a small set of data sources, then uses these mappings together with the sources to train a set of learners. Each learner exploits a different type of information either in the source schemas or in their data. Once the learners have been trained, LSD finds semantic mappings for a new data source by applying the learners, then combining their predictions using a meta-learner. To further improve matching accuracy, we extend machine learning techniques so that LSD can incorporate domain constraints as an additional source of knowledge, and develop a novel learner that utilizes the structural information in XML documents. Our approach thus is distinguished in that it incorporates multiple types of knowledge. Importantly, its architecture is extensible to additional learners that may exploit new kinds of information. We describe a set of experiments on several real-world domains, and show that LSD proposes semantic mappings with a high degree of accuracy.},
  address = {New York, NY, USA},
  author = {Doan, AnHai and Domingos, Pedro and Halevy, Alon Y.},
  doi = {http://doi.acm.org/10.1145/376284.375731},
  interhash = {1550f1948858bf8b315ea2fc6ed789cd},
  intrahash = {29e7660361ca79b97b00e5db51fb66ee},
  issn = {0163-5808},
  journal = {SIGMOD Rec.},
  number = 2,
  pages = {509--520},
  publisher = {ACM},
  title = {Reconciling schemas of disparate data sources: a machine-learning approach},
  url = {http://portal.acm.org/citation.cfm?id=375731&dl=GUIDE&coll=GUIDE&CFID=75153142&CFTOKEN=89522229},
  volume = 30,
  year = 2001
}

@article{kosala00web,
  address = {New York, NY, USA},
  author = {Kosala, R. and Blockeel, H.},
  bibsource = {DBLP, http://dblp.uni-trier.de},
  interhash = {99eea914954da48c9691277ce4e32932},
  intrahash = {59f6ef686827c7095cc89ebdb056a222},
  journal = {SIGKDD Explorations},
  number = 1,
  pages = {1--15},
  publisher = {ACM},
  title = {Web Mining Research: {A} Survey},
  url = {http://citeseer.nj.nec.com/kosala00web.html},
  volume = 2,
  year = 2000
}

@inproceedings{anti2008krause,
  address = {New York, NY, USA},
  author = {Krause, Beate and Schmitz, Christoph and Hotho, Andreas and Stumme, Gerd},
  booktitle = {AIRWeb '08: Proceedings of the 4th international workshop on Adversarial information retrieval on the web},
  doi = {http://doi.acm.org/10.1145/1451983.1451998},
  interhash = {a45d40ac7776551301ad9dde5b25357f},
  intrahash = {68effe5d4b9460f9388e7685310f74c2},
  isbn = {978-1-60558-159-0},
  location = {Beijing, China},
  pages = {61--68},
  publisher = {ACM},
  title = {The Anti-Social Tagger - Detecting Spam in Social Bookmarking Systems},
  url = {http://airweb.cse.lehigh.edu/2008/submissions/krause_2008_anti_social_tagger.pdf},
  year = 2008
}

@inproceedings{agrawal93,
  address = {New York, NY, USA},
  author = {Agrawal, Rakesh and Imielinski, Tomasz and Swami, Arun},
  booktitle = {SIGMOD '93: Proceedings of the 1993 ACM SIGMOD international conference on Management of data},
  interhash = {53341ce3e6ce51c3bcf8b0219ec239b5},
  intrahash = {ca35e602124130b480592b3a55267006},
  pages = {207--216},
  publisher = {ACM Press},
  title = {Mining association rules between sets of items in large databases},
  year = 1993
}

@book{ferber2003information,
  address = {Heidelberg},
  author = {Ferber, Reginald},
  interhash = {52c1b4ab3e818efef6635eb76b778608},
  intrahash = {b60dbc902a2e19877aec154fa5747751},
  publisher = {dpunkt Verlag},
  title = {Information Retrieval: Suchmodelle und Data-Mining-Verfahren für Textsammlungen und das Web},
  url = {http://information-retrieval.de/},
  year = 2003
}

@book{UBMA_280507895,
  address = {Herdecke ; Bochum},
  author = {Heyer, Gerhard and Quasthoff, Uwe and Wittig, Thomas},
  edition = {1. korr. Nachdr.},
  interhash = {d6fa152f7becd0a9d5155f748c29ac22},
  intrahash = {692999b8760981d3b2e0b9103b9d3b0f},
  isbn = {978-3-937137-30-8},
  pages = {XII, 348 S.},
  publisher = {W3L-Verl.},
  series = {IT lernen},
  title = {Text Mining: Wissensrohstoff Text},
  url = {http://aleph.bib.uni-mannheim.de/F/?func=find-b&request=280507895&find_code=020&adjacent=N&local_base=MAN01PUBLIC&x=0&y=0},
  year = 2008
}

@article{Pang.Lee2008OpinionMiningand,
  abstract = {An important part of our information-gathering behavior has always
been to ﬁnd out what other people think. With the growing availability
and popularity of opinion-rich resources such as online review sites
and personal blogs, new opportunities and challenges arise as people
now can, and do, actively use information technologies to seek out and
understand the opinions of others. The sudden eruption of activity in
the area of opinion mining and sentiment analysis, which deals with
the computational treatment of opinion, sentiment, and subjectivity
in text, has thus occurred at least in part as a direct response to the
surge of interest in new systems that deal directly with opinions as a
ﬁrst-class object.
This survey covers techniques and approaches that promise to
directly enable opinion-oriented information-seeking systems. Our
focus is on methods that seek to address the new challenges raised by
sentiment-aware applications, as compared to those that are already
present in more traditional fact-based analysis. We include material
on summarization of evaluative text and on broader issues regarding
privacy, manipulation, and economic impact that the development of
opinion-oriented information-access services gives rise to. To facilitate
future work, a discussion of available resources, benchmark datasets,
and evaluation campaigns is also provided.
},
  author = {Pang, B. and Lee, L.},
  doi = {10.1561/1500000001},
  interhash = {7bfd8b20ea5f9fb76e96d71c3155c50c},
  intrahash = {60ec6588322693f7636f8cd3bc820783},
  jlprojects = {cyberemotions},
  journal = {Information Retrieval},
  number = {1-2},
  pages = {1--135},
  title = {{Opinion Mining and Sentiment Analysis}},
  volume = 2,
  year = 2008
}

@incollection{1420085867,
  asin = {1420085867},
  author = {May, Michael and Berendt, Bettina and Cornuéjols, Antoine and Gama, Jõao and Giannotti, Fosca and Hotho, Andreas and Malerba, Donato and Menesalvas, Ernestina and Morik, Katharina and Pedersen, Rasmus and Saitta, Lorenza and Saygin, Yücel and Schuster, Assaf and Vanhoof, Koen},
  booktitle = {Next Generation of Data Mining (Chapman & Hall/Crc Data Mining and Knowledge Discovery Series)},
  dewey = {005.74},
  ean = {9781420085860},
  edition = 1,
  interhash = {7aeb3b998b5918d86093e05601e81b4d},
  intrahash = {be3c753af98ab591b4f31d349513b461},
  isbn = {1420085867},
  isbn13 = {9781420085860},
  publisher = {Chapman & Hall/CRC},
  title = {Research Challenges in Ubiquitous Knowledge Discovery},
  url = {http://208.254.79.11/shopping_cart/products/product_contents.asp?id=&parent_id=497&sku=C5867&isbn=9781420085860&pc=},
  year = 2008
}

@misc{Medelyan2008,
  abstract = {  Wikipedia is a goldmine of information; not just for its many readers, but
also for the growing community of researchers who recognize it as a resource of
exceptional scale and utility. It represents a vast investment of manual effort
and judgment: a huge, constantly evolving tapestry of concepts and relations
that is being applied to a host of tasks.
  This article provides a comprehensive description of this work. It focuses on
research that extracts and makes use of the concepts, relations, facts and
descriptions found in Wikipedia, and organizes the work into four broad
categories: applying Wikipedia to natural language processing; using it to
facilitate information retrieval and information extraction; and as a resource
for ontology building. The article addresses how Wikipedia is being used as is,
how it is being improved and adapted, and how it is being combined with other
structures to create entirely new resources. We identify the research groups
and individuals involved, and how their work has developed in the last few
years. We provide a comprehensive list of the open-source software they have
produced. We also discuss the implications of this work for the long-awaited
semantic web.
},
  author = {Medelyan, Olena and Legg, Catherine and Milne, David and Witten, Ian H.},
  interhash = {6614c7cd27d80abd691b2ef463941d1c},
  intrahash = {0e7499a4f087f74ad0be674047cf315d},
  note = {cite arxiv:0809.4530
Comment: An extensive survey of re-using information in Wikipedia in natural
  language processing, information retreival and extraction and ontology
  building. submitted},
  title = {Mining Meaning from Wikipedia},
  url = {http://arxiv.org/abs/0809.4530},
  year = 2008
}

@article{1324190,
  address = {New York, NY, USA},
  author = {Stavrianou, Anna and Andritsos, Periklis and Nicoloyannis, Nicolas},
  doi = {http://doi.acm.org/10.1145/1324185.1324190},
  interhash = {bde58d2eeb65f2194171f93b0e1f2a21},
  intrahash = {d8c54095392c0e83ab4f50f694d3b1f3},
  issn = {0163-5808},
  journal = {SIGMOD Rec.},
  number = 3,
  pages = {23--34},
  publisher = {ACM},
  title = {Overview and semantic issues of text mining},
  url = {http://portal.acm.org/citation.cfm?id=1324190},
  volume = 36,
  year = 2007
}

@article{Pang2008,
  author = {Pang, Bo and Lee, Lillian},
  date = {July 2008},
  interhash = {7bfd8b20ea5f9fb76e96d71c3155c50c},
  intrahash = {236d4f703fda3dd9457863f28eda56cb},
  isbn = {978-1-60198-150-9},
  journal = {Foundations and Trends® in Information Retrieval},
  number = {1-2},
  pages = {1-135},
  tech = {Now publishers},
  title = {Opinion mining and sentiment analysis},
  url = {http://www.cs.cornell.edu/home/llee/omsa/omsa-published.pdf},
  volume = 2,
  year = 2008
}

@inproceedings{Chakrabarti:2004,
  author = {Chakrabarti, D. and Zhan, Y. and Faloutsos, C.},
  booktitle = {SIAM International Conference on Data Mining},
  interhash = {d7719c6e919fbb8a37e09464f12988b6},
  intrahash = {5e5cc221d7da719909f3bf8c507b0afc},
  title = {R-MAT: A Recursive Model for Graph Mining},
  url = {http://www.cs.cmu.edu/~christos/PUBLICATIONS/siam04.pdf},
  year = 2004
}

@book{0387954333,
  asin = {0387954333},
  author = {Weiss, Sholom M. and Indurkhya, Nitin and Zhang, T.},
  dewey = {006.312},
  ean = {9780387954332},
  edition = 1,
  interhash = {d75b9da07cf40d54a79e6d8995f78a31},
  intrahash = {6ac07561b543e6033fd4c9811d0dccad},
  isbn = {0387954333},
  publisher = {Springer, Berlin},
  title = {Text Mining. Predictive Methods for Analyzing Unstructured Information},
  url = {http://www.amazon.de/gp/redirect.html%3FASIN=0387954333%26tag=ws%26lcode=xm2%26cID=2025%26ccmID=165953%26location=/o/ASIN/0387954333%253FSubscriptionId=13CT5CVB80YFWJEPWS02},
  year = 2004
}

@article{hotho2007mining,
  author = {Hotho, Andreas and Stumme, Gerd},
  interhash = {26915a205b66368931821165ecaf972c},
  intrahash = {92d3a5fdd786086fa12787e3e350b6af},
  journal = {Künstliche Intelligenz},
  number = 3,
  pages = {5-8},
  title = {Mining the World Wide Web},
  url = {http://kobra.bibliothek.uni-kassel.de/bitstream/urn:nbn:de:hebis:34-2008021320337/3/HothoStummeMiningWWW.pdf},
  vgwort = {20},
  year = 2007
}

@book{Berendt2007,
  abstract = {This book constitutes the refereed proceedings of the Workshop on Web Mining, WebMine 2006, held in Berlin, Germany, September 18th, 2006. Topics included are data mining based on analysis of bloggers and tagging, web mining, XML mining and further techniques of knowledge discovery. The book is especially valuable for those interested in the aspects of the Social Web (Web 2.0) and its inherent dynamic and diversity of user-generated content.},
  date = {September 18, 2006 Series:},
  editor = {Berendt, B. and Hotho, A. and Mladenic, D. and Semeraro, G.},
  interhash = {44843d4fe175e66198b58137dd924f44},
  intrahash = {8aa8d9bcbb5a5bb3fc480d1e53b27236},
  isbn = {978-3-540-74950-9},
  location = {Berlin, Germany},
  publisher = {Springer},
  series = {LNCS},
  title = {From Web to Social Web: Discovering and Deploying User and Content Profiles },
  url = {http://www.springer.com/dal/home?SGWID=1-102-22-173759307-0&changeHeader=true&referer=www.springeronline.com&SHORTCUT=www.springer.com/978-3-540-74950-9},
  vgwort = {279},
  volume = 4736,
  year = 2007
}

@article{1276056,
  address = {Amsterdam, The Netherlands, The Netherlands},
  author = {Wurst, Michael and Morik, Katharina},
  doi = {http://dx.doi.org/10.1016/j.future.2006.04.004},
  interhash = {591e4b107d4a5cafc0cf49e07fad72e0},
  intrahash = {e5eba80e58b4532a3fd3bcf50994734e},
  issn = {0167-739X},
  journal = {Future Gener. Comput. Syst.},
  number = 1,
  pages = {69--75},
  publisher = {Elsevier Science Publishers B. V.},
  title = {Distributed feature extraction in a p2p setting: a case study},
  url = {http://portal.acm.org/citation.cfm?id=1276056},
  volume = 23,
  year = 2007
}

@inproceedings{hotho02textws,
  author = {Hotho, Andreas and Maedche, Alexander and Staab, Steffen and Zacharias, Valentin},
  booktitle = {Proc. of Text Mining Workshop},
  interhash = {a8e24d64d26ca5b681a2c29e005cbcc2},
  intrahash = {56ae0afc93999014629b06cc958f6a04},
  title = {On Knowledgeable Unsupervised Text Mining },
  url = {http://www.aifb.uni-karlsruhe.de/WBS/aho/pub/txt_mining_ws_2002.pdf},
  year = 2002
}

@proceedings{berendt05european,
  editor = {Berendt, Bettina and Hotho, Andreas and Mladenic, Dunja and Semerano, Giovanni and Spiliopoulou, Myra and Stumme, Gerd and van Someren, Maarten},
  interhash = {6dfd547a42a7bd5ccacce75ea8875704},
  intrahash = {f306e43da22adede0286917d5d83eb3b},
  publisher = {Workshop at the 16th Europ. Conf. on Machine Learning (ECML'05) / 9th Europ. Conf. on Principles and Practice of Knowledge Discovery in Databases (PKDD'05)},
  title = {Proc. of the European Web Mining Forum 2005},
  year = 2005
}

@proceedings{msw2004,
  editor = {Hotho, Andreas and Sure, York and Getoor, Lise},
  interhash = {0e2afdbc572d2f6c19c60559919931c1},
  intrahash = {0fbd582960b0f6917d82bd1825c3d167},
  month = AUG,
  note = {located at the 10th International ACM SIGKDD Conference on Knowledge Discovery and Data Mining KDD 2004, 22nd August 2004, Seattle, WA, USA},
  title = {International Workshop on Mining for and from the Semantic Web (MSW2004)},
  url = {http://www.kde.cs.uni-kassel.de/hotho/pub/2004/msw2004_proceedings.pdf},
  year = 2004
}

@proceedings{WS_BHS02,
  address = {Helsinki},
  booktitle = {Proc. of the Semantic Web Mining          Workshop},
  editor = {Berendt, B. and Hotho, A. and Stumme, G.},
  interhash = {ad1bb62c9ae700cf0dc181ba42ee2b8f},
  intrahash = {f5bfd0060d08ec6aefea23c4add8da51},
  isbn = {3-540-41066-X},
  month = {August 20},
  publisher = {Workshop at 13th Europ. Conf. on Machine Learning (ECML'02) / 6th Europ.              Conf. on Principles and Practice of Knowledge Discovery in              Databases (PKDD'02)},
  title = {Semantic Web Mining},
  year = 2002
}

@proceedings{stumme_semwebmine_ws01,
  address = {Freiburg},
  booktitle = {Proc. of the Semantic Web Mining Workshop},
  editor = {Stumme, G. and Hotho, A. and Berendt, B.},
  interhash = {ba288488b822fd20d4d7d947f8036360},
  intrahash = {604cb78986cc5596b4c336b9c5543add},
  month = {September 3rd},
  publisher = {Workshop at 12th Europ. Conf. on Machine Learning (ECML'01) / 5th Europ.              Conf. on Principles and Practice of Knowledge Discovery in              Databases (PKDD'01)},
  title = {Semantic Web Mining},
  year = 2001
}

@proceedings{berendt2006webmining,
  editor = {Berendt, B. and Hotho, A. and Mladenic, D. and Semeraro, G.},
  interhash = {5e4e5a7285303d11a7f967495fe97059},
  intrahash = {ecab780995c630270857b610c7c54e67},
  title = {Workshop on Web Mining 2006 (WebMine)},
  url = {http://www.kde.cs.uni-kassel.de/ws/webmine2006/pdf/WebMine2006.pdf},
  year = 2006
}

@inproceedings{trias2006jaeschke,
  address = {Hong Kong},
  author = {Jäschke, Robert and Hotho, Andreas and Schmitz, Christoph and Ganter, Bernhard and Stumme, Gerd},
  booktitle = {Proc. 6th ICDM conference},
  doi = {http://doi.ieeecomputersociety.org/10.1109/ICDM.2006.162},
  interhash = {b4964c3bdd2991a80873d7080ef6a73e},
  intrahash = {f9a6e80c21b1266491d2509ca0b88eea},
  isbn = {0-7695-2701-9},
  issn = {1550-4786},
  month = {December},
  title = {TRIAS - An Algorithm for Mining Iceberg Tri-Lattices},
  vgwort = {19},
  year = 2006
}

@book{Berendt2004Web,
  address = {Heidelberg},
  editor = {Berendt, Bettina and Hotho, Andreas and Mladenic, Dunja and van Someren, Maarten and Spiliopoulou, Myra and Stumme, Gerd},
  interhash = {f5223cd62ce3c9d458d4cb5721880a0e},
  intrahash = {495f8565cdda14bbbe197d11425f0fe8},
  isbn = {3-540-23258-3},
  price = {EUR 38,52},
  publisher = {Springer},
  series = {LNAI},
  title = {Web Mining: From Web to Semantic Web},
  volume = 3209,
  year = 2004
}

@inproceedings{berendt05semantic,
  author = {Berendt, Bettina and Hotho, Andreas and Stumme, Gerd},
  booktitle = {Proc. of the 1st Intl. Workshop on Representation and Analysis of Web Space},
  editor = {Svatek, Vojtech and Snasel, Vaclav},
  interhash = {e4b853ff13b63a88b009610890b89348},
  intrahash = {f8826ba2790eeb857dd4becb31a08225},
  isbn = {80-248-0864-1},
  pages = {1--16},
  publisher = {Technical University of Ostrava},
  title = {Semantic Web Mining and the Representation, Analysis, and Evolution of Web Space},
  url = {http://ftp.informatik.rwth-aachen.de/Publications/CEUR-WS/Vol-164/raws2005-paper1.pdf},
  vgwort = {29},
  year = 2005
}

@article{kostoff,
  abstract = {Literature-related discovery (LRD) is the linking of two or more literature concepts that have heretofore not been linked (i.e., disjoint), in order to produce novel, interesting, plausible, and intelligible knowledge (i.e., potential discovery). The open discovery systems (ODS) component of LRD starts with a problem to be solved, and generates solutions to that problem through potential discovery. We have been using ODS LRD to identify potential treatments or preventative actions for challenging medical problems, among myriad other applications. This paper describes the second medical problem we addressed (cataract) using ODS LRD; the first problem addressed was Raynaud's Phenomenon (RP), and was described in the third paper of this Special Issue. Cataract was selected because it is ubiquitous globally, appears intractable to all forms of treatment other than surgical removal of cataracts, and is a major cause of blindness in many developing countries. The ODS LRD study had three objectives: a) identify non-drug non-surgical treatments that would 1) help prevent cataracts, or 2) reduce the progression rate of cataracts, or 3) stop the progression of cataracts, or 4) maybe even reverse the progression of cataracts; b) demonstrate that we could solve an ODS LRD problem with no prior knowledge of any results or prior work (unlike the case with the RP problem); c) determine whether large time savings in the discovery process were possible relative to the time required for conducting the RP study. To that end, we used the MeSH taxonomy of MEDLINE to restrict potential discoveries to selected semantic classes, as a substitute for the manually-intensive process used in the RP study to restrict potential discoveries to selected semantic classes. We also used additional semantic filtering to identify potential discovery within the selected semantic classes. All these goals were achieved. As will be shown, we generated large amounts of potential discovery in more than an order of magnitude less time than required for the RP study. We identified many non-drug non-surgical treatments that may be able to reduce or even stop the progression rate of cataracts. Time, and much testing, will determine whether this is possible. Finally, the methodology has been developed to the point where ODS LRD problems can be solved with no results or knowledge of any prior work.},
  author = {Kostoff, Ronald N.},
  interhash = {45ce0cd73dd62182ce1e447ba9fe71eb},
  intrahash = {b9359f79985da9b9677340ffda849e74},
  journal = {Technological Forecasting and Social Change},
  pages = {--},
  title = {Literature-related discovery (LRD): Potential treatments for cataracts},
  url = {http://www.sciencedirect.com/science/article/B6V71-4RDB8SC-9/2/8991fe8968a0ef12f22ed7e9ac9d7c4f},
  volume = {In Press, Corrected Proof},
  year = 2007
}

@inproceedings{conf/icdm/PopesculULP03,
  author = {Popescul, Alexandrin and Ungar, Lyle H. and Lawrence, Steve and Pennock, David M.},
  booktitle = {ICDM},
  crossref = {conf/icdm/2003},
  date = {2004-01-28},
  ee = {http://csdl.computer.org/comp/proceedings/icdm/2003/1978/00/19780275abs.htm},
  interhash = {3bcb76c6628b1752db555f86fe39429e},
  intrahash = {7cdd6b0791fcdf17ec6d404b55f12c5c},
  isbn = {0-7695-1978-4},
  pages = {275-282},
  publisher = {IEEE Computer Society},
  title = {Statistical Relational Learning for Document Mining.},
  url = {http://www.cis.upenn.edu/~popescul/Publications/popescul03dm.pdf},
  year = 2003
}

@inproceedings{feldman95KDT,
  author = {Feldman, R. and Dagan, I.},
  booktitle = {Proc. of the First Int. Conf. on Knowledge Discovery (KDD)},
  interhash = {15f076596b35048463f828687410ea30},
  intrahash = {d1bb2e8dff9bd80da158b4b770685dce},
  key = {feldman95KDT},
  label = {KDT - Knowledge Discovery in Texts},
  pages = {112-117},
  title = {Knowledge Discovery in Textual Databases (KDT)},
  type = {InProceedings},
  year = 1995
}

@inbook{baldi03modelling,
  abstract = {Modeling the Internet and the Web covers the most important aspects of modeling the Web using a modern mathematical and probabilistic treatment. It focuses on the information and application layers, as well as some of the emerging properties of the Internet.  Provides a comprehensive introduction to the modeling of the Internet and the Web at the information level.  Takes a modern approach based on mathematical, probabilistic, and graphical modeling.  Provides an integrated presentation of theory, examples, exercises and applications.  Covers key topics such as text analysis, link analysis, crawling techniques, human behaviour, and commerce on the Web. Interdisciplinary in nature, Modeling the Internet and the Web will be of interest to students and researchers from a variety of disciplines including computer science, machine learning, engineering, statistics, economics, business, and the social sciences.},
  author = {Baldi, Pierre and Frasconi, Paolo and Smyth, Padhraic},
  booktitle = {Modeling the Internet and the Web: Probabilistic Methods and Algorithms},
  citeulike-article-id = {822915},
  interhash = {416f2405193ae7d30cffe673dee89df2},
  intrahash = {3e4e2899e7d6988218d02a264bcfe24a},
  month = {April},
  priority = {2},
  publisher = {Wiley},
  title = {Modeling the Internet and the Web: Probabilistic Methods and Algorithms},
  url = {http://eu.wiley.com/WileyCDA/WileyTitle/productCd-0470849061.html},
  year = 2003
}

@proceedings{DBLP:conf/kdd/1999web,
  bibsource = {DBLP, http://dblp.uni-trier.de},
  booktitle = {WEBKDD},
  editor = {Masand, Brij M. and Spiliopoulou, Myra},
  interhash = {29a69416c66bd604c4599009915dc0b0},
  intrahash = {18a9697e8ca04f637487e79b6be9cc83},
  isbn = {3-540-67818-2},
  publisher = {Springer},
  series = {Lecture Notes in Computer Science},
  title = {Web Usage Analysis and User Profiling, International WEBKDD'99
               Workshop, San Diego, California, USA, August 15, 1999, Revised
               Papers},
  volume = 1836,
  year = 2000
}

@book{books/mk/Pyle99,
  author = {Pyle, Dorian},
  date = {2002-01-28},
  interhash = {3edec307e8a02fa778ee847eccfb4215},
  intrahash = {29f6bc4833269393dabf92bae3afa905},
  isbn = {1-55860-529-0},
  publisher = {Morgan Kaufmann},
  title = {Data Preparation for Data Mining},
  year = 1999
}

@inproceedings{conf/das/SchenkerBLK04,
  author = {Schenker, Adam and Bunke, Horst and Last, Mark and Kandel, Abraham},
  booktitle = {Document Analysis Systems},
  crossref = {conf/das/2004},
  date = {2005-01-05},
  editor = {Marinai, Simone and Dengel, Andreas},
  ee = {http://springerlink.metapress.com/openurl.asp?genre=article&issn=0302-9743&volume=3163&spage=401},
  interhash = {83ba06e8918a227fb2345e047e40f619},
  intrahash = {4450261ce5af13db99ce208800dff22c},
  isbn = {3-540-23060-2},
  pages = {401-412},
  publisher = {Springer},
  series = {Lecture Notes in Computer Science},
  title = {A Graph-Based Framework for Web Document Mining.},
  url = {http://dblp.uni-trier.de/db/conf/das/das2004.html#SchenkerBLK04},
  volume = 3163,
  year = 2004
}

@article{keyhere,
  asin = {9812563393},
  author = {Schenker, Adam and Bunke, Horst and Last, Mark and Kandel, Abraham},
  interhash = {247e95a6025dff9119c7943b5a33f917},
  intrahash = {3f9897fc8abcf1bcb1fd0212a23a4134},
  isbn = {9812563393},
  title = {Graph-Theoretic Techniques for Web Content Mining},
  typesource = {Simple CitationSource},
  url = {http://www.amazon.ca/Graph-Theoretic-Techniques-Web-Content-Mining/dp/9812563393/ref=sr_1_7/701-3503486-7337153?ie=UTF8&s=books&qid=1175673405&sr=1-7},
  year = 2005
}

@inproceedings{conf/kdd/BorgesL98,
  author = {Borges, José and Levene, Mark},
  booktitle = {KDD},
  date = {2002-12-17},
  interhash = {c231f04d16109f3e336deb073fe1fb5b},
  intrahash = {db40027e6c7e7a90ca72cc860208f7ab},
  pages = {149-153},
  title = {Mining Association Rules in Hypertext Databases.},
  url = {http://paginas.fe.up.pt/~jlborges/publications/arhtKDD98.ps},
  year = 1998
}

@inproceedings{conf/dawak/MadriaBNL99,
  author = {Madria, Sanjay Kumar and Bhowmick, Sourav S. and Ng, Wee Keong and Lim, Ee-Peng},
  booktitle = {DaWaK},
  crossref = {conf/dawak/1999},
  date = {2002-03-05},
  editor = {Mohania, Mukesh K. and Tjoa, A. Min},
  ee = {http://link.springer.de/link/service/series/0558/bibs/1676/16760303.htm},
  interhash = {706390429ca5bf6e33d2fda37f956922},
  intrahash = {1016d42a26ca0748ba227d62b7126933},
  isbn = {3-540-66458-0},
  pages = {303-312},
  publisher = {Springer},
  series = {Lecture Notes in Computer Science},
  title = {Research Issues in Web Data Mining.},
  url = {http://dblp.uni-trier.de/db/conf/dawak/dawak99.html#MadriaBNL99},
  volume = 1676,
  year = 1999
}

@article{journals/cacm/Etzioni96,
  author = {Etzioni, Oren},
  cdrom = {CACMs1/CACM3911/P0065.pdf},
  cite = {conf/kdd/ZaianeH95},
  date = {2003-11-20},
  ee = {db/journals/cacm/Etzioni96.html},
  interhash = {5c3111490c1f8a998d8d5f974c8afe65},
  intrahash = {a8955e592d1eed82fdf43314241b5423},
  journal = {Commun. ACM},
  number = 11,
  pages = {65-68},
  title = {The World-Wide Web: Quagmire or Gold Mine?},
  url = {http://doi.acm.org/10.1145/240455.240473},
  volume = 39,
  year = 1996
}

@inproceedings{content-only,
  author = {Cooley, R. and Mobasher, B. and Srivastava, J.},
  booktitle = {Proceedings of the Ninth IEEE International Conference          on Tools with Artificial Intelligence (ICTAI'97)},
  interhash = {94895d7c0cc214ed623d941b2dab7367},
  intrahash = {e385cc03235ad1efc751e12fb2fd11d0},
  location = {Newport Beach, CA},
  month = Nov,
  publisher = {IEEE Computer Society},
  title = {Web Mining: Information and Pattern Discovery on the World Wide Web},
  url = {http://maya.cs.depaul.edu/~mobasher/papers/webminer-tai97.ps},
  year = 1997
}

@article{jws2006Semantic,
  abstract = {SemanticWeb Mining aims at combining the two fast-developing research areas SemanticWeb andWeb Mining.
This survey analyzes the convergence of trends from both areas: an increasing number of researchers is working on
improving the results ofWeb Mining by exploiting semantic structures in theWeb, and they make use ofWeb Mining
techniques for building the Semantic Web. Last but not least, these techniques can be used for mining the Semantic
Web itself.
The Semantic Web is the second-generation WWW, enriched by machine-processable information which supports
the user in his tasks. Given the enormous size even of today’s Web, it is impossible to manually enrich all of
these resources. Therefore, automated schemes for learning the relevant information are increasingly being used.
Web Mining aims at discovering insights about the meaning of Web resources and their usage. Given the primarily
syntactical nature of the data being mined, the discovery of meaning is impossible based on these data only. Therefore,
formalizations of the semantics of Web sites and navigation behavior are becoming more and more common.
Furthermore, mining the Semantic Web itself is another upcoming application. We argue that the two areas Web
Mining and Semantic Web need each other to fulfill their goals, but that the full potential of this convergence is not
yet realized. This paper gives an overview of where the two areas meet today, and sketches ways of how a closer
integration could be profitable.},
  author = {Stumme, Gerd and Hotho, Andreas and Berendt, Bettina},
  interhash = {3fd4efcf649ab35e8ef001f19b7ff83c},
  intrahash = {9937253e6b2591267a0596fa597a4b96},
  issn = {1570-8268},
  journal = {Journal of Web Semantics},
  number = 2,
  pages = {124-143},
  publisher = {Elsevier},
  title = {Semantic Web Mining - State of the Art and Future Directions},
  url = {http://www.kde.cs.uni-kassel.de/hotho/pub/2006/JWS2006SemanticWebMining.pdf},
  vgwort = {74},
  volume = 4,
  year = 2006
}

@book{Semantic2006Ackermann,
  author = {Ackermann, Markus and Berendt, Bettina and Grobelnik, Marko and Hotho, Andreas and Mladenic, Dunja and Semeraro, Giovanni and Spiliopoulou, Myra and Stumme, Gerd and Svatek, Vojtech and van Someren, Maarten},
  interhash = {f4891d2967571b77f1276298f112158f},
  intrahash = {fa07206128a5844d06488cfe93b6e22c},
  isbn13 = {978-3-540-47697-9},
  issn = {0302-9743},
  title = {Semantics, Web and Mining},
  url = {http://dx.doi.org/10.1007/11908678},
  year = 2006
}

@article{wikipediaxml:2005,
  author = {Denoyer, Ludovic and Gallinari, Patrick},
  interhash = {0e9b9afb15804d3e625d73ada85900b1},
  intrahash = {493b849942fcaf9ba8e8e68e3cb46d38},
  journal = {SIGIR Forum},
  title = {{T}he {W}ikipedia {X}{M}{L} {C}orpus},
  url = {http://www-connex.lip6.fr/~denoyer/wikipediaXML/},
  year = 2006
}

@misc{web2006witten,
  author = {Witten, Ian and Gori, Marco and Numerico, Teresa},
  interhash = {8e54756085b574381d8f35d20dc989c7},
  intrahash = {26dd2b2627dc5ee6100840328e0c20f0},
  isbn = {0-12-370609-2},
  title = {Web Dragons: Inside the Myths of Search Engine Technology },
  year = 2006
}

@article{han98hypergraph,
  author = {Han, Eui-Hong and Karypis, George and Kumar, Vipin and Mobasher, Bamshad},
  interhash = {3bb7fb3fd3af41fac2db5460a5acfd2c},
  intrahash = {9723b092d975dedb8f6d5f711bb00ffd},
  journal = {Data Engineering Bulletin},
  number = 1,
  pages = {15-22},
  title = {Hypergraph Based Clustering in High-Dimensional Data Sets: A Summary of Results},
  url = {http://citeseer.ist.psu.edu/han98hypergraph.html},
  volume = 21,
  year = 1998
}

@inproceedings{awic2003,
  author = {Oberle, Daniel and Berendt, Bettina and Hotho, Andreas and Gonzalez, Jorge},
  booktitle = {Advances in Web Intelligence, First International Atlantic Web Intelligence Conference, AWIC 2003, Madrid, Spain, May 5-6, 2003, Proceedings},
  editor = {Ruiz, Ernestina Menasalvas and Segovia, Javier and Szczepaniak, Piotr S.},
  interhash = {f55b03b7f6aa768c16e09abe349238b2},
  intrahash = {0e850231f9b08acb97a39c18e6591526},
  pages = {142-154},
  publisher = {Springer},
  series = {Lecture Notes in Artificial Intelligence},
  title = {Conceptual User Tracking},
  url = {http://www.aifb.uni-karlsruhe.de/WBS/dob/pubs/awic2003.pdf},
  volume = 2663,
  year = 2003
}

@article{bloehdorn-etal-ldv-2005,
  author = {Bloehdorn, Stephan and Cimiano, Philipp and Hotho, Andreas and Staab, Steffen},
  interhash = {174605ce734930cbdffb624175e845f9},
  intrahash = {0abe60273dec207b626c41f431b16b49},
  issn = {0175-1336},
  journal = {LDV Forum - GLDV Journal for Computational Linguistics and Language Technology},
  month = MAY,
  number = 1,
  pages = {87-112},
  title = {An Ontology-based Framework for Text Mining},
  vgwort = {26},
  volume = 20,
  year = 2005
}

@article{hotho-etal-ldv-2005,
  author = {Hotho, Andreas and Nürnberger, Andreas and Paaß, Gerhard},
  interhash = {a324706344ddfce8a288870adeef18cb},
  intrahash = {6ecc8a3cee1a99bbb9f8f8dd6a9d2959},
  issn = {0175-1336},
  journal = {LDV Forum - GLDV Journal for Computational Linguistics and Language Technology},
  month = MAY,
  number = 1,
  pages = {19-62},
  title = { A Brief Survey of Text Mining},
  url = {http://www.kde.cs.uni-kassel.de/hotho/pub/2005/hotho05TextMining.pdf},
  vgwort = {44},
  volume = 20,
  year = 2005
}

@misc{Newman+Hettich+Blake+Merz:1998,
  author = {Newman, C.L. Blake D.J. and Merz, C.J.},
  institution = {University of California, Irvine, Dept. of Information and Computer Sciences},
  interhash = {325a6b5ac5a1f044943d4298c21d31dd},
  intrahash = {85308db3df761f63f16a7cab4eb8d4aa},
  title = {{UCI} Repository of machine learning databases},
  url = {http://www.ics.uci.edu/$\sim$mlearn/MLRepository.html},
  year = 1998
}

@inproceedings{conf/sigmod/NgLHP98,
  author = {Ng, Raymond T. and Lakshmanan, Laks V. S. and Han, Jiawei and Pang, Alex},
  booktitle = {SIGMOD Conference},
  cdrom = {SIGMOD98/P013.PDF},
  cite = {conf/sigmod/AgrawalIS93},
  ee = {db/conf/sigmod/NgLHP98.html},
  interhash = {c4e73bae8e22a39d15d022631c69ddbf},
  intrahash = {72825e6a12b3285349fb64c1020383c0},
  pages = {13-24},
  title = {Exploratory Mining and Pruning Optimizations of Constrained Association Rules.},
  url = {http://dblp.uni-trier.de/db/conf/sigmod/sigmod98.html#NgLHP98},
  year = 1998
}

@inproceedings{672836,
  address = {San Francisco, CA, USA},
  author = {Agrawal, Rakesh and Srikant, Ramakrishnan},
  booktitle = {VLDB '94: Proceedings of the 20th International Conference on Very Large Data Bases},
  interhash = {960c924ccbe1ff429a30f7433ec53122},
  intrahash = {cce11d670329a38a90f625b8005dfb8d},
  isbn = {1-55860-153-8},
  pages = {487--499},
  publisher = {Morgan Kaufmann Publishers Inc.},
  title = {Fast Algorithms for Mining Association Rules in Large Databases},
  year = 1994
}

@article{park1995ehb,
  author = {Park, J.S. and Chen, M.S. and Yu, P.S.},
  interhash = {e7a28762e92ab579ed3f99c565848f9a},
  intrahash = {094af08c931c876e20fd0e1e5086583b},
  journal = {Proceedings of the 1995 ACM SIGMOD international conference on Management of data},
  pages = {175-186},
  publisher = {ACM Press New York, NY, USA},
  title = {{An effective hash-based algorithm for mining association rules}},
  year = 1995
}

@book{citeulike:340715,
  author = {Witten, Ian H. and Frank, Eibe},
  citeulike-article-id = {340715},
  edition = {Second},
  howpublished = {Paperback},
  interhash = {6d7648942e58f322a39a1e8c37edfec6},
  intrahash = {57ade2d873735d4c54d44365dafa7605},
  isbn = {0120884070},
  month = {June},
  priority = {0},
  publisher = {Morgan Kaufmann},
  series = {Morgan Kaufmann Series in Data Management Sys},
  title = {Data Mining: Practical Machine Learning Tools and Techniques},
  url = {http://www.amazon.fr/exec/obidos/ASIN/0120884070/citeulike04-21},
  year = 2005
}

@inproceedings{conf/ecml/DrostS05,
  author = {Drost, Isabel and Scheffer, Tobias},
  booktitle = {ECML},
  ee = {http://dx.doi.org/10.1007/11564096_14},
  interhash = {813e48f0b04788c76ec4c9500cc5f8a0},
  intrahash = {243303d890cd0f999d78b1a7e148e38c},
  pages = {96-107},
  title = {Thwarting the Nigritude Ultramarine: Learning to Identify Link Spam.},
  url = {http://dblp.uni-trier.de/db/conf/ecml/ecml2005.html#DrostS05},
  year = 2005
}

@book{books/mit/FayyadPSU96,
  editor = {Fayyad, Usama M. and Piatetsky-Shapiro, Gregory and Smyth, Padhraic and Uthurusamy, Ramasamy},
  interhash = {c11811ccd720de5dad0ffea4741725f0},
  intrahash = {3553c3acc971c03813352c40afe7476a},
  isbn = {0-262-56097-6},
  publisher = {AAAI/MIT Press},
  title = {Advances in Knowledge Discovery and Data Mining.},
  url = {http://www.amazon.com/gp/product/0262560976},
  year = 1996
}

@inproceedings{conf/kdd/LiuHM98,
  author = {Liu, Bing and Hsu, Wynne and Ma, Yiming},
  booktitle = {KDD},
  ee = {http://portal.acm.org/citation.cfm?id=502512.502560},
  interhash = {7a6df9139732cad1130e5ab4bd1cb203},
  intrahash = {76bb213b1e4c8b14fe7e4f4d2569a4af},
  pages = {80-86},
  title = {Integrating Classification and Association Rule Mining.},
  url = {http://www.comp.nus.edu.sg/~dm2/publications/kdd98_1.ps},
  year = 1998
}

@article{1117456,
  address = {New York, NY, USA},
  author = {Getoor, Lise and Diehl, Christopher P.},
  doi = {http://doi.acm.org/10.1145/1117454.1117456},
  interhash = {d71ff8114c8b062f36d58ca649bc2a04},
  intrahash = {ac02f1d7dea7a106bc4103c8a9ec4aef},
  journal = {SIGKDD Explor. Newsl.},
  number = 2,
  pages = {3--12},
  publisher = {ACM Press},
  title = {Link mining: a survey},
  url = {http://www.cpdiehl.org/lmsurvey.pdf},
  volume = 7,
  year = 2005
}

@inproceedings{larsenkdd99,
  author = {Larsen, B. and Aone, Ch.},
  booktitle = {Proceedings of the 5th International Conference on Knowledge Discovery and Data Mining, KDD 1999},
  interhash = {03b9c1db32172b54d58716e324e63511},
  intrahash = {de4dd4dfe60be5a81b6efb51e2918222},
  pages = {16-22},
  publisher = {ACM},
  title = {Fast and Effective Text Mining Using Linear-time Document Clustering},
  year = 1999
}

@article{citeulike:171426,
  author = {Adomavicius, G. and Tuzhilin, A.},
  citeulike-article-id = {171426},
  interhash = {42f7653127a823354d000ea95cf804be},
  intrahash = {55294392edb717922798725dd8be80b3},
  journal = {Knowledge and Data Engineering, IEEE Transactions on},
  keywords = {collaborative collaborative-filtering filtering mining personalization recommender recommender-systems systems},
  number = 6,
  pages = {734--749},
  priority = {2},
  title = {Toward the Next Generation of Recommender Systems: A Survey of the State-of-the-Art and Possible Extensions},
  url = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=1423975},
  volume = 17,
  year = 2005
}

@inproceedings{stumme02Usage,
  author = {Stumme, Gerd and Berendt, Bettina and Hotho, Andreas},
  booktitle = {Next Generation Data Mining. Proc. NSF Workshop, Baltimore, Nov. 2002},
  interhash = {479de77764be1ec66534be1c647e0857},
  intrahash = {9e423442a05fc82d602339f7f930dc2e},
  pages = {77-86},
  title = {Usage Mining for and on the Semantic Web},
  url = {http://www.aifb.uni-karlsruhe.de/WBS/gst/papers/2002/NSF-NGDM02.pdf},
  year = 2002
}

@inproceedings{hartmann02Semantic,
  author = {Hartmann, Jens and Hotho, Andreas and Stumme, Gerd},
  booktitle = {Proc. Arbeitskreistreffen Knowledge Discovery, Oldenburg, Sept. 2002},
  interhash = {c07545febc9e7b32803bf33547ec9004},
  intrahash = {9e17d23f1b119ff2a70e2837b2ca8d89},
  pages = {34-38},
  title = {Semantic Web Mining for Building Information Portals (Position Paper)},
  year = 2002
}

@inproceedings{semweb-mining-iswc02,
  address = {Sardinia, Italy},
  author = {Berendt, B. and Hotho, A. and Stumme, G.},
  booktitle = {{P}roceedings of the {F}irst {I}nternational {S}emantic {W}eb               {C}onference: The {S}emantic {W}eb ({ISWC} 2002)},
  editor = {Horrocks, I. and Hendler, J. A.},
  interhash = {4dd40c50089d3b86fb235bfaf3c8bee7},
  intrahash = {5eb991563ce9a51f3fa5786436406ba8},
  pages = {264-278},
  publisher = {Springer},
  series = {Lecture Notes in Computer Science (LNCS)},
  title = {Towards Semantic Web Mining},
  volume = 2342,
  year = 2002
}

@article{hotho_jucs01,
  author = {Hotho, A. and Maedche, A. and Staab, S. and Studer, R.},
  interhash = {0f997271cdc6b639ca196fc54b7d277e},
  intrahash = {b9a6e840ccef27311575ed33d40bcc53},
  journal = {Journal of Universal Computer Science (J.UCS)},
  number = 7,
  pages = {566-590},
  title = {{SEAL-II} --- The Soft Spot between Richly Structured and
          Unstructured Knowledge},
  volume = 7,
  year = 2001
}

@article{DBLP:journals/sigkdd/HothoSG04,
  author = {Hotho, Andreas and Sure, York and Getoor, Lise},
  bibsource = {DBLP, http://dblp.uni-trier.de},
  ee = {http://doi.acm.org/10.1145/1046482},
  interhash = {02a216da151c9bd84ec4c131e1a43f89},
  intrahash = {1aae46b03f32f0d69caa735abe81825e},
  journal = {SIGKDD Explorations},
  number = 2,
  pages = {142-143},
  title = {A workshop report: mining for and from the Semantic Web at KDD 2004.},
  volume = 6,
  year = 2004
}

@inproceedings{Kohavi01,
  address = {New York},
  author = {Kohavi, R.},
  bb-month = {August 2002},
  booktitle = {KDD 2001 -- Proceedings of the Seventh ACM SIGKDD               International Conference on Knowledge Discovery and Data               Mining, San Francisco, CA, August 26-29, 2002},
  interhash = {f69e8c723e050ca382741ab9f7b5922b},
  intrahash = {e4c7b91e2f7b4f2cde75b48bf3ab2a0e},
  location = {Helsinki, Finland},
  pages = {8--13},
  publisher = {ACM},
  title = {Mining E-Commerce Data: The Good, the Bad, and the Ugly},
  year = 2001
}

@incollection{Borges99,
  author = {Borges, J. L. and Levene, M.},
  booktitle = {\cite{webkdd99book}},
  interhash = {069643ae58ff3572ec20a18a383d3085},
  intrahash = {aea83f9f73682beaa12066e4d28e392b},
  isbn = {3-540-43760-6},
  pages = {92--111},
  title = {Data mining of user navigation patterns},
  year = 2000
}

@article{Cooleyetal99,
  author = {Cooley, R. and Mobasher, B. and Srivastava, J.},
  interhash = {68b1e11110e6498699524008fe67f8c1},
  intrahash = {e515dc2a8adbc7fa84b7fe968b61391e},
  journal = {Journal of Knowledge and Information Systems},
  location = {Santa Barbara, CA},
  number = 1,
  pages = {5--32},
  title = {Data preparation for mining world wide web browsing patterns},
  volume = 1,
  year = 1999
}

@article{Srivastavaetal,
  author = {Srivastava, J. and Cooley, R. and Deshpande, M. and Tan, P.-N.},
  interhash = {08571943908ec1aa9aa5c003e79d5b8d},
  intrahash = {dc941da0f5c7da937269241b0df0b3b3},
  journal = {SIGKDD Explorations},
  location = {Santa Barbara, CA},
  number = 2,
  pages = {12--23},
  title = {Web usage mining: discovery and application of usage patterns from web data},
  url = {http://citeseer.nj.nec.com/srivastava00web.html},
  volume = 1,
  year = 2000
}

@book{HanKamber01,
  address = {San Francisco, LA},
  author = {Han and Kamber},
  interhash = {f902c324cdc1b270bdf9d996ba19dca7},
  intrahash = {b9884cf23229d6cb71535484424be1ff},
  location = {Helsinki, Finland},
  publisher = {Morgan Kaufmann},
  title = {Data Mining. Concepts and Techniques},
  year = 2001
}

@inproceedings{leeicdm2001,
  author = {Lee, Jung-Won and Lee, Kiho and Kim, Won},
  booktitle = {Proceedings of the 2001 IEEE International Conference on Data
               Mining, 29 November - 2 December 2001, San Jose, California,
               USA},
  editor = {Cercone, Nick and Lin, Tsau Young and Wu, Xindong},
  interhash = {23dd8c004ec7cd9a380101edcfc7c31b},
  intrahash = {e8fc4f311c34c1007c794379c1629d73},
  location = {Madrid},
  pages = {345-352},
  publisher = {IEEE Computer Society},
  title = {Preparations for Semantics-Based XML Mining},
  year = 2001
}

@incollection{SrivastavaetalNGDM,
  author = {Srivastava, J. and Desikan, P. and Kumar, V.},
  booktitle = {\cite{NGDM-book}},
  interhash = {840c6d7403c6c0c62414de7b5112fb83},
  intrahash = {1dc5a9c7e27b52924e2a6eb06732fd19},
  isbn = {90-74821-43-X},
  title = {Web Mining -- Concepts, Applications \& Research Directions},
  year = 2003
}

@book{BerryLinoffWebMining,
  address = {New York},
  author = {Linoff, G.S. and Berry, M.J.A.},
  interhash = {da5f8a53ac15a7efdcefceb04adbd5af},
  intrahash = {0b23e0a44cf267b346c67240a86b308a},
  isbn = {90-74821-43-X},
  publisher = {John Wiley \& Sons},
  title = {Mining the Web. Transforming Customer Data into Customer Value},
  year = 2001
}

@article{ZaianeSimoff03,
  author = {Zaïane, O.R. and Simoff, S.J.},
  bibsource = {DBLP, http://dblp.uni-trier.de},
  interhash = {38ce81529e3b7527655b6fa0049926ea},
  intrahash = {7a6bf1f7aed80780a5b73db192d3bed5},
  journal = {SIGKDD Explorations},
  number = 2,
  publisher = {ACM},
  title = {MDM/KDD: Multimedia data mining for the second time},
  volume = 3,
  year = 2003
}

@inproceedings{Simoff00,
  address = {www.cs.ualberta.ca/~zaiane/mdm\_kdd2000/mdm00-15.pdf},
  author = {Simoff, S.J.},
  bibsource = {DBLP, http://dblp.uni-trier.de},
  booktitle = {Proceedings of the MDKM/KDD2000 Workshop on
Multimedia Data Mining},
  editor = {Simoff, S.J. and Zaïane, O.R.},
  interhash = {fcabb55db4171c241c2fa33bec8d3998},
  intrahash = {f35304fac130bf0acf880d7141c65e7e},
  pages = {104--109},
  title = {Variations on multimedia data mining},
  year = 2000
}

@inproceedings{WS_gs01,
  author = {Grand, B. Le and Soto, M.},
  booktitle = {\cite{WS_SHB01}},
  interhash = {8f0a73989e17b35dff4928d498475841},
  intrahash = {cc72df61f4c0de369a4018ec02edffcb},
  location = {Aix-en-Provence, France},
  pages = {67--83},
  title = {XML Topic Maps and Semantic Web Mining},
  year = 2001
}

@article{MobasheretalCACM,
  author = {Mobasher, B. and Cooley, R. and Srivastava, J.},
  interhash = {98d5090dafb39596483c75dc4a6846c3},
  intrahash = {a7a6cdb6e0790b276d7f0642991e734e},
  journal = {Communications of the ACM},
  location = {Santa Barbara, CA},
  number = 8,
  pages = {142--151},
  title = {Automatic personalization based on Web usage mining},
  volume = 43,
  year = 2000
}

@book{Handetal01,
  author = {Hand, D. and Mannila, H. and Smyth, P.},
  interhash = {1d7f5d27a1dfea47e1ac543fcb027c0d},
  intrahash = {e2e195a4102d09f2ed4976bf102af01d},
  location = {Santa Barbara, CA},
  publisher = {Cambridge, MA: MIT Press},
  title = {Principles of Data Mining},
  year = 2001
}

@article{Chakrabarti2000,
  author = {Chakrabarti, S.},
  interhash = {e54787477738b367fad5aa57523a2cfd},
  intrahash = {31d8cbe4de9c172bf3284a29b0cf57bf},
  journal = {SIGKDD Explorations},
  location = {Santa Barbara, CA},
  number = 2,
  pages = {1--11},
  publisher = {ACM},
  title = {Data mining for hypertext: A tutorial survey},
  url = {citeseer.nj.nec.com/304115.html},
  volume = 1,
  year = 2000
}

@inproceedings{SrikantAgrawal95,
  author = {Srikant, R. and Agrawal, R.},
  booktitle = {Proceedings of the 21st International Conference on Very Large Databases},
  interhash = {7799834fb2c22b9467af5ac445d69036},
  intrahash = {8c68af52691fe5b3f4db26dea63c8a31},
  location = {Zurich, Switzerland},
  month = Sep,
  pages = {407--419},
  title = {Mining Generalized Association Rules},
  year = 1995
}

@inproceedings{SPT02,
  author = {Spiliopoulou, M. and Pohle, C. and Teltzrow, M.},
  bb-note = {Sept. 9-11, 2002},
  booktitle = {Proceedings of the Multi-Konferenz Wirtschaftsinformatik},
  interhash = {9027c73d94f5a17edf0dc6ad50696a98},
  intrahash = {091d2e0d48173261edcfa0398cfc7ed2},
  location = {N\"urnberg, Germany},
  month = Sep,
  title = {Modelling and Mining Web Site Usage Strategies},
  year = 2002
}