@book{feldman2006mining, asin = {0521836573}, author = {Feldman, Ronen and Sanger, James}, dewey = {005.74}, ean = {9780521836579}, interhash = {14cb9e63c6dca7830675c73578dcc30b}, intrahash = {a310b253098a92c9f6352f568c1a9c37}, isbn = {0521836573}, publisher = {Cambridge University Press}, title = {The Text Mining Handbook: Advanced Approaches in Analyzing Unstructured Data}, url = {http://www.amazon.com/Text-Mining-Handbook-Approaches-Unstructured/dp/0521836573/ref=sr_1_1?s=books&ie=UTF8&qid=1295265273&sr=1-1}, year = 2007 } @incollection{springerlink:10.1007/978-3-642-14000-6_4, abstract = {Social resource sharing systems are central elements of the Web 2.0 and use all the same kind of lightweight knowledge representation, called folksonomy. As these systems are easy to use, they attract huge masses of users. Data Mining provides methods to analyze data and to learn models which can be used to support users. The application and adaptation of known data mining algorithms to folksonomies with the goal to support the users of such systems and to extract valuable information with a special focus on the Semantic Web is the main target of this paper. In this work we give a short introduction into folksonomies with a focus on our own system BibSonomy. Based on the analysis we made on a large folksonomy dataset, we present the application of data mining algorithms on three different tasks, namely spam detection, ranking and recommendation. To bridge the gap between folksonomies and the Semantic Web, we apply association rule mining to extract relations and present a deeper analysis of statistical measures which can be used to extract tag relations. This approach is complemented by presenting two approaches to extract conceptualizations from folksonomies.}, address = {Berlin / Heidelberg}, affiliation = {University of Kassel Knowledge & Data Engineering Group 34121 Kassel Germany}, author = {Hotho, Andreas}, booktitle = {Intelligent Information Access}, doi = {10.1007/978-3-642-14000-6_4}, editor = {Armano, Giuliano and de Gemmis, Marco and Semeraro, Giovanni and Vargiu, Eloisa}, interhash = {8995ce2ba8835668820622f8aae908fe}, intrahash = {1a7906f61b76a87f618e0db657f5c6d9}, pages = {57-82}, publisher = {Springer}, series = {Studies in Computational Intelligence}, title = {Data Mining on Folksonomies}, url = {http://dx.doi.org/10.1007/978-3-642-14000-6_4}, volume = 301, year = 2010 } @article{Berendt201095, author = {Berendt, Bettina and Hotho, Andreas and Stumme, Gerd}, doi = {DOI: 10.1016/j.websem.2010.04.008}, interhash = {4969eb2b7bf1fabe60c5f23ab6383d77}, intrahash = {f8d7bc2af5753906dc3897196daac18c}, issn = {1570-8268}, journal = {Web Semantics: Science, Services and Agents on the World Wide Web}, note = {Bridging the Gap--Data Mining and Social Network Analysis for Integrating Semantic Web and Web 2.0; The Future of Knowledge Dissemination: The Elsevier Grand Challenge for the Life Sciences}, number = {2-3}, pages = {95 - 96}, title = {Bridging the Gap--Data Mining and Social Network Analysis for Integrating Semantic Web and Web 2.0}, url = {http://www.sciencedirect.com/science/article/B758F-4YXK4HW-1/2/4cb514565477c54160b5e6eb716c32d7}, volume = 8, year = 2010 } @article{375731, abstract = {A data-integration system provides access to a multitude of data sources through a single mediated schema. A key bottleneck in building such systems has been the laborious manual construction of semantic mappings between the source schemas and the mediated schema. We describe LSD, a system that employs and extends current machine-learning techniques to semi-automatically find such mappings. LSD first asks the user to provide the semantic mappings for a small set of data sources, then uses these mappings together with the sources to train a set of learners. Each learner exploits a different type of information either in the source schemas or in their data. Once the learners have been trained, LSD finds semantic mappings for a new data source by applying the learners, then combining their predictions using a meta-learner. To further improve matching accuracy, we extend machine learning techniques so that LSD can incorporate domain constraints as an additional source of knowledge, and develop a novel learner that utilizes the structural information in XML documents. Our approach thus is distinguished in that it incorporates multiple types of knowledge. Importantly, its architecture is extensible to additional learners that may exploit new kinds of information. We describe a set of experiments on several real-world domains, and show that LSD proposes semantic mappings with a high degree of accuracy.}, address = {New York, NY, USA}, author = {Doan, AnHai and Domingos, Pedro and Halevy, Alon Y.}, doi = {http://doi.acm.org/10.1145/376284.375731}, interhash = {1550f1948858bf8b315ea2fc6ed789cd}, intrahash = {29e7660361ca79b97b00e5db51fb66ee}, issn = {0163-5808}, journal = {SIGMOD Rec.}, number = 2, pages = {509--520}, publisher = {ACM}, title = {Reconciling schemas of disparate data sources: a machine-learning approach}, url = {http://portal.acm.org/citation.cfm?id=375731&dl=GUIDE&coll=GUIDE&CFID=75153142&CFTOKEN=89522229}, volume = 30, year = 2001 } @article{kosala00web, address = {New York, NY, USA}, author = {Kosala, R. and Blockeel, H.}, bibsource = {DBLP, http://dblp.uni-trier.de}, interhash = {99eea914954da48c9691277ce4e32932}, intrahash = {59f6ef686827c7095cc89ebdb056a222}, journal = {SIGKDD Explorations}, number = 1, pages = {1--15}, publisher = {ACM}, title = {Web Mining Research: {A} Survey}, url = {http://citeseer.nj.nec.com/kosala00web.html}, volume = 2, year = 2000 } @inproceedings{anti2008krause, address = {New York, NY, USA}, author = {Krause, Beate and Schmitz, Christoph and Hotho, Andreas and Stumme, Gerd}, booktitle = {AIRWeb '08: Proceedings of the 4th international workshop on Adversarial information retrieval on the web}, doi = {http://doi.acm.org/10.1145/1451983.1451998}, interhash = {a45d40ac7776551301ad9dde5b25357f}, intrahash = {68effe5d4b9460f9388e7685310f74c2}, isbn = {978-1-60558-159-0}, location = {Beijing, China}, pages = {61--68}, publisher = {ACM}, title = {The Anti-Social Tagger - Detecting Spam in Social Bookmarking Systems}, url = {http://airweb.cse.lehigh.edu/2008/submissions/krause_2008_anti_social_tagger.pdf}, year = 2008 } @inproceedings{agrawal93, address = {New York, NY, USA}, author = {Agrawal, Rakesh and Imielinski, Tomasz and Swami, Arun}, booktitle = {SIGMOD '93: Proceedings of the 1993 ACM SIGMOD international conference on Management of data}, interhash = {53341ce3e6ce51c3bcf8b0219ec239b5}, intrahash = {ca35e602124130b480592b3a55267006}, pages = {207--216}, publisher = {ACM Press}, title = {Mining association rules between sets of items in large databases}, year = 1993 } @book{ferber2003information, address = {Heidelberg}, author = {Ferber, Reginald}, interhash = {52c1b4ab3e818efef6635eb76b778608}, intrahash = {b60dbc902a2e19877aec154fa5747751}, publisher = {dpunkt Verlag}, title = {Information Retrieval: Suchmodelle und Data-Mining-Verfahren für Textsammlungen und das Web}, url = {http://information-retrieval.de/}, year = 2003 } @book{UBMA_280507895, address = {Herdecke ; Bochum}, author = {Heyer, Gerhard and Quasthoff, Uwe and Wittig, Thomas}, edition = {1. korr. Nachdr.}, interhash = {d6fa152f7becd0a9d5155f748c29ac22}, intrahash = {692999b8760981d3b2e0b9103b9d3b0f}, isbn = {978-3-937137-30-8}, pages = {XII, 348 S.}, publisher = {W3L-Verl.}, series = {IT lernen}, title = {Text Mining: Wissensrohstoff Text}, url = {http://aleph.bib.uni-mannheim.de/F/?func=find-b&request=280507895&find_code=020&adjacent=N&local_base=MAN01PUBLIC&x=0&y=0}, year = 2008 } @article{Pang.Lee2008OpinionMiningand, abstract = {An important part of our information-gathering behavior has always been to find out what other people think. With the growing availability and popularity of opinion-rich resources such as online review sites and personal blogs, new opportunities and challenges arise as people now can, and do, actively use information technologies to seek out and understand the opinions of others. The sudden eruption of activity in the area of opinion mining and sentiment analysis, which deals with the computational treatment of opinion, sentiment, and subjectivity in text, has thus occurred at least in part as a direct response to the surge of interest in new systems that deal directly with opinions as a first-class object. This survey covers techniques and approaches that promise to directly enable opinion-oriented information-seeking systems. Our focus is on methods that seek to address the new challenges raised by sentiment-aware applications, as compared to those that are already present in more traditional fact-based analysis. We include material on summarization of evaluative text and on broader issues regarding privacy, manipulation, and economic impact that the development of opinion-oriented information-access services gives rise to. To facilitate future work, a discussion of available resources, benchmark datasets, and evaluation campaigns is also provided. }, author = {Pang, B. and Lee, L.}, doi = {10.1561/1500000001}, interhash = {7bfd8b20ea5f9fb76e96d71c3155c50c}, intrahash = {60ec6588322693f7636f8cd3bc820783}, jlprojects = {cyberemotions}, journal = {Information Retrieval}, number = {1-2}, pages = {1--135}, title = {{Opinion Mining and Sentiment Analysis}}, volume = 2, year = 2008 } @incollection{1420085867, asin = {1420085867}, author = {May, Michael and Berendt, Bettina and Cornuéjols, Antoine and Gama, Jõao and Giannotti, Fosca and Hotho, Andreas and Malerba, Donato and Menesalvas, Ernestina and Morik, Katharina and Pedersen, Rasmus and Saitta, Lorenza and Saygin, Yücel and Schuster, Assaf and Vanhoof, Koen}, booktitle = {Next Generation of Data Mining (Chapman & Hall/Crc Data Mining and Knowledge Discovery Series)}, dewey = {005.74}, ean = {9781420085860}, edition = 1, interhash = {7aeb3b998b5918d86093e05601e81b4d}, intrahash = {be3c753af98ab591b4f31d349513b461}, isbn = {1420085867}, isbn13 = {9781420085860}, publisher = {Chapman & Hall/CRC}, title = {Research Challenges in Ubiquitous Knowledge Discovery}, url = {http://208.254.79.11/shopping_cart/products/product_contents.asp?id=&parent_id=497&sku=C5867&isbn=9781420085860&pc=}, year = 2008 } @misc{Medelyan2008, abstract = { Wikipedia is a goldmine of information; not just for its many readers, but also for the growing community of researchers who recognize it as a resource of exceptional scale and utility. It represents a vast investment of manual effort and judgment: a huge, constantly evolving tapestry of concepts and relations that is being applied to a host of tasks. This article provides a comprehensive description of this work. It focuses on research that extracts and makes use of the concepts, relations, facts and descriptions found in Wikipedia, and organizes the work into four broad categories: applying Wikipedia to natural language processing; using it to facilitate information retrieval and information extraction; and as a resource for ontology building. The article addresses how Wikipedia is being used as is, how it is being improved and adapted, and how it is being combined with other structures to create entirely new resources. We identify the research groups and individuals involved, and how their work has developed in the last few years. We provide a comprehensive list of the open-source software they have produced. We also discuss the implications of this work for the long-awaited semantic web. }, author = {Medelyan, Olena and Legg, Catherine and Milne, David and Witten, Ian H.}, interhash = {6614c7cd27d80abd691b2ef463941d1c}, intrahash = {0e7499a4f087f74ad0be674047cf315d}, note = {cite arxiv:0809.4530 Comment: An extensive survey of re-using information in Wikipedia in natural language processing, information retreival and extraction and ontology building. submitted}, title = {Mining Meaning from Wikipedia}, url = {http://arxiv.org/abs/0809.4530}, year = 2008 } @article{1324190, address = {New York, NY, USA}, author = {Stavrianou, Anna and Andritsos, Periklis and Nicoloyannis, Nicolas}, doi = {http://doi.acm.org/10.1145/1324185.1324190}, interhash = {bde58d2eeb65f2194171f93b0e1f2a21}, intrahash = {d8c54095392c0e83ab4f50f694d3b1f3}, issn = {0163-5808}, journal = {SIGMOD Rec.}, number = 3, pages = {23--34}, publisher = {ACM}, title = {Overview and semantic issues of text mining}, url = {http://portal.acm.org/citation.cfm?id=1324190}, volume = 36, year = 2007 } @article{Pang2008, author = {Pang, Bo and Lee, Lillian}, date = {July 2008}, interhash = {7bfd8b20ea5f9fb76e96d71c3155c50c}, intrahash = {236d4f703fda3dd9457863f28eda56cb}, isbn = {978-1-60198-150-9}, journal = {Foundations and Trends® in Information Retrieval}, number = {1-2}, pages = {1-135}, tech = {Now publishers}, title = {Opinion mining and sentiment analysis}, url = {http://www.cs.cornell.edu/home/llee/omsa/omsa-published.pdf}, volume = 2, year = 2008 } @inproceedings{Chakrabarti:2004, author = {Chakrabarti, D. and Zhan, Y. and Faloutsos, C.}, booktitle = {SIAM International Conference on Data Mining}, interhash = {d7719c6e919fbb8a37e09464f12988b6}, intrahash = {5e5cc221d7da719909f3bf8c507b0afc}, title = {R-MAT: A Recursive Model for Graph Mining}, url = {http://www.cs.cmu.edu/~christos/PUBLICATIONS/siam04.pdf}, year = 2004 } @book{0387954333, asin = {0387954333}, author = {Weiss, Sholom M. and Indurkhya, Nitin and Zhang, T.}, dewey = {006.312}, ean = {9780387954332}, edition = 1, interhash = {d75b9da07cf40d54a79e6d8995f78a31}, intrahash = {6ac07561b543e6033fd4c9811d0dccad}, isbn = {0387954333}, publisher = {Springer, Berlin}, title = {Text Mining. Predictive Methods for Analyzing Unstructured Information}, url = {http://www.amazon.de/gp/redirect.html%3FASIN=0387954333%26tag=ws%26lcode=xm2%26cID=2025%26ccmID=165953%26location=/o/ASIN/0387954333%253FSubscriptionId=13CT5CVB80YFWJEPWS02}, year = 2004 } @article{hotho2007mining, author = {Hotho, Andreas and Stumme, Gerd}, interhash = {26915a205b66368931821165ecaf972c}, intrahash = {92d3a5fdd786086fa12787e3e350b6af}, journal = {Künstliche Intelligenz}, number = 3, pages = {5-8}, title = {Mining the World Wide Web}, url = {http://kobra.bibliothek.uni-kassel.de/bitstream/urn:nbn:de:hebis:34-2008021320337/3/HothoStummeMiningWWW.pdf}, vgwort = {20}, year = 2007 } @book{Berendt2007, abstract = {This book constitutes the refereed proceedings of the Workshop on Web Mining, WebMine 2006, held in Berlin, Germany, September 18th, 2006. Topics included are data mining based on analysis of bloggers and tagging, web mining, XML mining and further techniques of knowledge discovery. The book is especially valuable for those interested in the aspects of the Social Web (Web 2.0) and its inherent dynamic and diversity of user-generated content.}, date = {September 18, 2006 Series:}, editor = {Berendt, B. and Hotho, A. and Mladenic, D. and Semeraro, G.}, interhash = {44843d4fe175e66198b58137dd924f44}, intrahash = {8aa8d9bcbb5a5bb3fc480d1e53b27236}, isbn = {978-3-540-74950-9}, location = {Berlin, Germany}, publisher = {Springer}, series = {LNCS}, title = {From Web to Social Web: Discovering and Deploying User and Content Profiles }, url = {http://www.springer.com/dal/home?SGWID=1-102-22-173759307-0&changeHeader=true&referer=www.springeronline.com&SHORTCUT=www.springer.com/978-3-540-74950-9}, vgwort = {279}, volume = 4736, year = 2007 } @article{1276056, address = {Amsterdam, The Netherlands, The Netherlands}, author = {Wurst, Michael and Morik, Katharina}, doi = {http://dx.doi.org/10.1016/j.future.2006.04.004}, interhash = {591e4b107d4a5cafc0cf49e07fad72e0}, intrahash = {e5eba80e58b4532a3fd3bcf50994734e}, issn = {0167-739X}, journal = {Future Gener. Comput. Syst.}, number = 1, pages = {69--75}, publisher = {Elsevier Science Publishers B. V.}, title = {Distributed feature extraction in a p2p setting: a case study}, url = {http://portal.acm.org/citation.cfm?id=1276056}, volume = 23, year = 2007 } @inproceedings{hotho02textws, author = {Hotho, Andreas and Maedche, Alexander and Staab, Steffen and Zacharias, Valentin}, booktitle = {Proc. of Text Mining Workshop}, interhash = {a8e24d64d26ca5b681a2c29e005cbcc2}, intrahash = {56ae0afc93999014629b06cc958f6a04}, title = {On Knowledgeable Unsupervised Text Mining }, url = {http://www.aifb.uni-karlsruhe.de/WBS/aho/pub/txt_mining_ws_2002.pdf}, year = 2002 }