@incollection{agrawal2014mining, abstract = {We propose a system for mining videos from the web for supplementing the content of electronic textbooks in order to enhance their utility. Textbooks are generally organized into sections such that each section explains very few concepts and every concept is primarily explained in one section. Building upon these principles from the education literature and drawing upon the theory of }, author = {Agrawal, Rakesh and Christoforaki, Maria and Gollapudi, Sreenivas and Kannan, Anitha and Kenthapadi, Krishnaram and Swaminathan, Adith}, booktitle = {Formal Concept Analysis}, doi = {10.1007/978-3-319-07248-7_16}, editor = {Glodeanu, CynthiaVera and Kaytoue, Mehdi and Sacarea, Christian}, interhash = {fc34406f5a91a9561ba0e12b98830f28}, intrahash = {76ac675e26647d14199da79c3467bc90}, isbn = {978-3-319-07247-0}, language = {English}, pages = {219-234}, publisher = {Springer International Publishing}, series = {Lecture Notes in Computer Science}, title = {Mining Videos from the Web for Electronic Textbooks}, url = {http://dx.doi.org/10.1007/978-3-319-07248-7_16}, volume = 8478, year = 2014 } @proceedings{cellier2014proceedings, bibsource = {dblp computer science bibliography, http://dblp.org}, editor = {Cellier, Peggy and Charnois, Thierry and Hotho, Andreas and Matwin, Stan and Moens, Marie{-}Francine and Toussaint, Yannick}, interhash = {212d282598a034c37510c1c08c4f3a34}, intrahash = {cfb7265080d484cfda32e1fbdaff361f}, publisher = {CEUR-WS.org}, series = {{CEUR} Workshop Proceedings}, title = {Proceedings of the 1st International Workshop on Interactions between Data Mining and Natural Language Processing co-located with The European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases, DMNLP@PKDD/ECML 2014, Nancy, France, September 15, 2014}, url = {http://ceur-ws.org/Vol-1202}, volume = 1202, year = 2014 } @article{atzmueller2014ubicon, abstract = {The combination of ubiquitous and social computing is an emerging research area which integrates different but complementary methods, techniques and tools. In this paper, we focus on the Ubicon platform, its applications, and a large spectrum of analysis results. Ubicon provides an extensible framework for building and hosting applications targeting both ubiquitous and social environments. We summarize the architecture and exemplify its implementation using four real-world applications built on top of Ubicon. In addition, we discuss several scientific experiments in the context of these applications in order to give a better picture of the potential of the framework, and discuss analysis results using several real-world data sets collected utilizing Ubicon.}, author = {Atzmueller, Martin and Becker, Martin and Kibanov, Mark and Scholz, Christoph and Doerfel, Stephan and Hotho, Andreas and Macek, Bjoern-Elmar and Mitzlaff, Folke and Mueller, Juergen and Stumme, Gerd}, doi = {10.1080/13614568.2013.873488}, interhash = {6364e034fa868644b30618dc887c0270}, intrahash = {176e4f2816af5fe1630ed65e062900ce}, journal = {New Review of Hypermedia and Multimedia}, number = 1, pages = {53--77}, title = {{Ubicon and its Applications for Ubiquitous Social Computing}}, url = {http://www.tandfonline.com/doi/abs/10.1080/13614568.2013.873488}, volume = 20, year = 2014 } @article{jiang2013understanding, abstract = {Popular online social networks (OSNs) like Facebook and Twitter are changing the way users communicate and interact with the Internet. A deep understanding of user interactions in OSNs can provide important insights into questions of human social behavior and into the design of social platforms and applications. However, recent studies have shown that a majority of user interactions on OSNs are latent interactions, that is, passive actions, such as profile browsing, that cannot be observed by traditional measurement techniques. In this article, we seek a deeper understanding of both active and latent user interactions in OSNs. For quantifiable data on latent user interactions, we perform a detailed measurement study on Renren, the largest OSN in China with more than 220 million users to date. All friendship links in Renren are public, allowing us to exhaustively crawl a connected graph component of 42 million users and 1.66 billion social links in 2009. Renren also keeps detailed, publicly viewable visitor logs for each user profile. We capture detailed histories of profile visits over a period of 90 days for users in the Peking University Renren network and use statistics of profile visits to study issues of user profile popularity, reciprocity of profile visits, and the impact of content updates on user popularity. We find that latent interactions are much more prevalent and frequent than active events, are nonreciprocal in nature, and that profile popularity is correlated with page views of content rather than with quantity of content updates. Finally, we construct latent interaction graphs as models of user browsing behavior and compare their structural properties, evolution, community structure, and mixing times against those of both active interaction graphs and social graphs.}, acmid = {2517040}, address = {New York, NY, USA}, articleno = {18}, author = {Jiang, Jing and Wilson, Christo and Wang, Xiao and Sha, Wenpeng and Huang, Peng and Dai, Yafei and Zhao, Ben Y.}, doi = {10.1145/2517040}, interhash = {af18171c38a0b07fce62fb3fac5c6322}, intrahash = {aa9695f56135fd58de32b5b4a4c73698}, issn = {1559-1131}, issue_date = {October 2013}, journal = {ACM Trans. Web}, month = nov, number = 4, numpages = {39}, pages = {18:1--18:39}, publisher = {ACM}, title = {Understanding Latent Interactions in Online Social Networks}, url = {http://doi.acm.org/10.1145/2517040}, volume = 7, year = 2013 } @incollection{noKey, abstract = {Formal Concept Analysis (FCA) is an unsupervised clustering technique and many scientific papers are devoted to applying FCA in Information Retrieval (IR) research. We collected 103 papers published between 2003-2009 which mention FCA and information retrieval in the abstract, title or keywords. Using a prototype of our FCA-based toolset CORDIET, we converted the pdf-files containing the papers to plain text, indexed them with Lucene using a thesaurus containing terms related to FCA research and then created the concept lattice shown in this paper. We visualized, analyzed and explored the literature with concept lattices and discovered multiple interesting research streams in IR of which we give an extensive overview. The core contributions of this paper are the innovative application of FCA to the text mining of scientific papers and the survey of the FCA-based IR research.}, author = {Poelmans, Jonas and Ignatov, DmitryI. and Viaene, Stijn and Dedene, Guido and Kuznetsov, SergeiO.}, booktitle = {Advances in Data Mining. Applications and Theoretical Aspects}, doi = {10.1007/978-3-642-31488-9_22}, editor = {Perner, Petra}, interhash = {04d543b37049a90f2cbd796585bda214}, intrahash = {f6eddba1f2c6b7cdbfa67a0c79ae5ae8}, isbn = {978-3-642-31487-2}, language = {English}, pages = {273-287}, publisher = {Springer Berlin Heidelberg}, series = {Lecture Notes in Computer Science}, title = {Text Mining Scientific Papers: A Survey on FCA-Based Information Retrieval Research}, url = {http://dx.doi.org/10.1007/978-3-642-31488-9_22}, volume = 7377, year = 2012 } @inproceedings{MASH:13b, address = {Bamberg, Germany}, author = {Mitzlaff, Folke and Atzmueller, Martin and Stumme, Gerd and Hotho, Andreas}, booktitle = {Proc. LWA 2013 (KDML Special Track)}, interhash = {73088600a500f7d06768615d6e1c2b3d}, intrahash = {820ffb2166b330bf60bb30b16e426553}, publisher = {University of Bamberg}, title = {{On the Semantics of User Interaction in Social Media (Extended Abstract, Resubmission)}}, year = 2011 } @article{atzmueller2013exploratory, author = {Atzmueller, Martin and Lemmerich, Florian}, interhash = {6e83bf4017fffe31a5632289d91c1b6d}, intrahash = {9f176520035c05191d77ebd53803b817}, journal = {International Journal of Web Science (Special Issue on Social Web Search and Mining)}, number = {1/2}, title = {{Exploratory Pattern Mining on Social Media using Geo-References and Social Tagging Information}}, volume = 2, year = 2013 } @inproceedings{mitzlaff2011semantics, address = {Bamberg, Germany}, author = {Mitzlaff, Folke and Atzmueller, Martin and Stumme, Gerd and Hotho, Andreas}, booktitle = {Proc. LWA 2013 (KDML Special Track)}, interhash = {73088600a500f7d06768615d6e1c2b3d}, intrahash = {820ffb2166b330bf60bb30b16e426553}, publisher = {University of Bamberg}, title = {{On the Semantics of User Interaction in Social Media (Extended Abstract, Resubmission)}}, year = 2011 } @inproceedings{kibanov2013evolution, address = {Boston, MA, USA}, author = {Kibanov, Mark and Atzmueller, Martin and Scholz, Christoph and Stumme, Gerd}, booktitle = {Proc. IEEE CPSCom 2013}, interhash = {14e73d96c8554e73214c36b49add934c}, intrahash = {5824b6151b2046d6295e4300311b7e8e}, publisher = {IEEE Computer Society}, title = {{On the Evolution of Contacts and Communities in Networks of Face-to-Face Proximity}}, year = 2013 } @inproceedings{scholz2013people, address = {New York, NY, USA}, author = {Scholz, Christoph and Atzmueller, Martin and Kibanov, Mark and Stumme, Gerd}, booktitle = {Proc. ASONAM 2013}, interhash = {8b6051b794789000c4baa5ab059fab18}, intrahash = {0fee74fd5b3f4c2af09e3af256086c63}, publisher = {ACM Press}, title = {{How Do People Link? Analysis of Contact Structures in Human Face-to-Face Proximity Networks}}, year = 2013 } @incollection{seipel2013mining, address = {Heidelberg, Germany}, author = {Seipel, Dietmar and Köhler, Stefan and Neubeck, Philipp and Atzmueller, Martin}, booktitle = {{Postproceedings of the 1st Workshop on New Frontiers in Mining Complex Patterns (NFMCP 2012}}, interhash = {5c349ddd3d5627b0ccb31ee1de55faa2}, intrahash = {0c1b2ffe552535d3f34521a74f4c4253}, publisher = {Springer Verlag}, series = {LNAI}, title = {{Mining Complex Event Patterns in Computer Networks}}, year = 2013 } @article{mitzlaff2013userrelatedness, author = {Mitzlaff, Folke and Atzmueller, Martin and Benz, Dominik and Hotho, Andreas and Stumme, Gerd}, interhash = {40aa075d925f2e6e009986fd9e60b11b}, intrahash = {424d0f2d4a5c9a0eb68cbf2fc5b0010a}, journal = {CoRR/abs}, title = {{User-Relatedness and Community Structure in Social Interaction Networks}}, volume = {1309.3888}, year = 2013 } @inproceedings{kibanov2013evolution, address = {Bamberg, Germany}, author = {Kibanov, Mark and Atzmueller, Martin and Scholz, Christoph and Stumme, Gerd}, booktitle = {Proc. LWA 2013 (KDML Special Track)}, interhash = {84a970ee998f7e8d24b597c285e1887c}, intrahash = {a5d94c9930fd413a404efe6d9420a752}, publisher = {University of Bamberg}, title = {{Evolution of Contacts and Communities in Networks of Face-to-Face Proximity (Extended Abstract, Resubmission)}}, year = 2013 } @inproceedings{atzmueller2013subgroup, address = {Prague, Czech Republic}, author = {Atzmueller, Martin and Mueller, Juergen}, booktitle = {{Proceedings of the International Workshop on Mining Ubiquitous and Social Environments (MUSE2013)}}, interhash = {339aa18b5a6047ea4005f6c6a48b9bb6}, intrahash = {56661bf6212e1e5ab839640fdfba09b6}, title = {{Subgroup Analytics and Interactive Assessment on Ubiquitous Data}}, year = 2013 } @misc{atzmueller2013conferator, author = {Atzmueller, Martin and Kibanov, Mark and Scholz, Christoph and Stumme, Gerd}, howpublished = {Poster at INFORMATIK 2013}, institution = {University of Koblenz-Landau}, interhash = {caba49a0a623346dad82849aba632778}, intrahash = {2668432d7c035c34c4af4f6f96e22397}, publisher = {INFORMATIK 2013}, title = {{Conferator - a Social System for Conference and Contact Management}}, year = 2013 } @book{rothberghofer2013proceedings, address = {Annecy, France}, editor = {Roth-Berghofer, Thomas and Oussena, Samia and Atzmueller, Martin}, interhash = {f1f8980780e57fc36c9c44ce7d3673b4}, intrahash = {2b36fe460601217691180f0affbbe286}, publisher = {CONTEXT 2013}, title = {{Proceedings of the 2013 International Smart University Workshop (SmartU 2013)}}, year = 2013 } @article{hotho2010ubiquitous, author = {Hotho, Andreas and {Ulslev Pedersen}, Rasmus and Wurst, Michael}, interhash = {e779fb5dff41b65bce1aa38fdca4a376}, intrahash = {56f2940d5d0f2ce59c342d3b8ad42ca1}, issn = {0302-9743}, journal = {Lecture Notes in Computer Science}, number = 6202, pages = {61--74}, publisher = {Springer}, title = {Ubiquitous Data}, url = {http://rd.springer.com/content/pdf/10.1007%2F978-3-642-16392-0_4.pdf}, year = 2010 } @electronic{han2012mining, address = {Waltham, Mass.}, author = {Han, Jiawei and Kamber, Micheline and Pei, Jian}, interhash = {247a70f1f22ce1914e46d7ff6f43e378}, intrahash = {beb274b9aeaebb87f5423781b6839f54}, isbn = {0123814790}, publisher = {Morgan Kaufmann Publishers}, refid = {818321921}, title = {Data mining concepts and techniques, third edition}, url = {http://www.amazon.de/Data-Mining-Concepts-Techniques-Management/dp/0123814790/ref=tmm_hrd_title_0?ie=UTF8&qid=1366039033&sr=1-1}, year = 2012 } @book{han2011mining, address = {Amsterdam [u.a.]}, author = {Han, Jiawei and Kamber, Micheline}, interhash = {c94d7099d50ba439ee4579de99af285c}, intrahash = {1cf9e5362e9f34194abad46a4ff9c771}, isbn = {9780123814791 0123814790}, publisher = {Elsevier/Morgan Kaufmann}, refid = {734060711}, title = {Data mining : concepts and techniques}, url = {http://www.amazon.de/Data-Mining-Practical-Techniques-Management/dp/0123748569/ref=sr_1_2?ie=UTF8&qid=1366038862&sr=8-2&keywords=Data+mining}, year = 2011 } @book{north2012mining, author = {North, Matthew}, interhash = {c2a4d59bda60a400d2f1d45eefe68c93}, intrahash = {b382402685b65047b826d48260dca7d6}, isbn = {9780615684376 0615684378}, refid = {814299849}, title = {Data mining for the masses}, url = {http://www.amazon.de/Data-Mining-Masses-Matthew-North/dp/0615684378/ref=sr_1_1?s=books-intl-de&ie=UTF8&qid=1366038800&sr=1-1&keywords=rapidminer}, year = 2012 } @article{strohmaier2012understanding, abstract = {While recent progress has been achieved in understanding the structure and dynamics of social tagging systems, we know little about the underlying user motivations for tagging, and how they influence resulting folksonomies and tags. This paper addresses three issues related to this question. (1) What distinctions of user motivations are identified by previous research, and in what ways are the motivations of users amenable to quantitative analysis? (2) To what extent does tagging motivation vary across different social tagging systems? (3) How does variability in user motivation influence resulting tags and folksonomies? In this paper, we present measures to detect whether a tagger is primarily motivated by categorizing or describing resources, and apply these measures to datasets from seven different tagging systems. Our results show that (a) users’ motivation for tagging varies not only across, but also within tagging systems, and that (b) tag agreement among users who are motivated by categorizing resources is significantly lower than among users who are motivated by describing resources. Our findings are relevant for (1) the development of tag-based user interfaces, (2) the analysis of tag semantics and (3) the design of search algorithms for social tagging systems.}, author = {Strohmaier, Markus and Körner, Christian and Kern, Roman}, doi = {10.1016/j.websem.2012.09.003}, interhash = {0b972aa7d8892d70761ba3ba11a737f6}, intrahash = {5c063dc162f38895336d2775507132ee}, issn = {1570-8268}, journal = {Web Semantics: Science, Services and Agents on the World Wide Web}, number = 0, pages = {1 - 11}, title = {Understanding why users tag: A survey of tagging motivation literature and results from an empirical study}, url = {http://www.sciencedirect.com/science/article/pii/S1570826812000820}, volume = 17, year = 2012 } @inproceedings{carman2009statistical, abstract = {We investigate tag and query logs to see if the terms people use to annotate websites are similar to the ones they use to query for them. Over a set of URLs, we compare the distribution of tags used to annotate each URL with the distribution of query terms for clicks on the same URL. Understanding the relationship between the distributions is important to determine how useful tag data may be for improving search results and conversely, query data for improving tag prediction. In our study, we compare both term frequency distributions using vocabulary overlap and relative entropy. We also test statistically whether the term counts come from the same underlying distribution. Our results indicate that the vocabulary used for tagging and searching for content are similar but not identical. We further investigate the content of the websites to see which of the two distributions (tag or query) is most similar to the content of the annotated/searched URL. Finally, we analyze the similarity for different categories of URLs in our sample to see if the similarity between distributions is dependent on the topic of the website or the popularity of the URL.}, acmid = {1571965}, address = {New York, NY, USA}, author = {Carman, Mark J. and Baillie, Mark and Gwadera, Robert and Crestani, Fabio}, booktitle = {Proceedings of the 32nd international ACM SIGIR conference on Research and development in information retrieval}, doi = {10.1145/1571941.1571965}, interhash = {d023f082cc783251a90a2f71c71826eb}, intrahash = {d3e4319a20670f7f73bdf83b63bdf4c7}, isbn = {978-1-60558-483-6}, location = {Boston, MA, USA}, numpages = {8}, pages = {123--130}, publisher = {ACM}, series = {SIGIR '09}, title = {A statistical comparison of tag and query logs}, url = {http://doi.acm.org/10.1145/1571941.1571965}, year = 2009 } @article{agosti2012analysis, abstract = {In the last decade, the importance of analyzing information management systems logs has grown, because log data constitute a relevant aspect in evaluating the quality of such systems. A review of 10 years of research on log analysis is presented in this paper. About 50 papers and posters from five major conferences and about 30 related journal papers have been selected to trace the history of the state-of-the-art in this field. The paper presents an overview of two main themes: Web search engine log analysis and Digital Library System log analysis. The problem of the analysis of different sources of log data and the distribution of data are investigated.}, author = {Agosti, Maristella and Crivellari, Franco and Di Nunzio, GiorgioMaria}, doi = {10.1007/s10618-011-0228-8}, interhash = {b3a3325250a6df194ab28f2e1f39c8b3}, intrahash = {9b85b7d3c5587c5f0920f0d602ba93b1}, issn = {1384-5810}, journal = {Data Mining and Knowledge Discovery}, language = {English}, number = 3, pages = {663-696}, publisher = {Springer US}, title = {Web log analysis: a review of a decade of studies about information acquisition, inspection and interpretation of user interaction}, url = {http://dx.doi.org/10.1007/s10618-011-0228-8}, volume = 24, year = 2012 } @article{park2005searching, abstract = {Transaction logs of NAVER, a major Korean Web search engine, were analyzed to track the information-seeking behavior of Korean Web users. These transaction logs include more than 40 million queries collected over 1 week. This study examines current transaction log analysis methodologies and proposes a method for log cleaning, session definition, and query classification. A term definition method which is necessary for Korean transaction log analysis is also discussed. The results of this study show that users behave in a simple way: they type in short queries with a few query terms, seldom use advanced features, and view few results' pages. Users also behave in a passive way: they seldom change search environments set by the system. It is of interest that users tend to change their queries totally rather than adding or deleting terms to modify the previous queries. The results of this study might contribute to the development of more efficient and effective Web search engines and services.}, author = {Park, Soyeon and Lee, Joon Ho and Bae, Hee Jin}, doi = {10.1016/j.lisr.2005.01.013}, interhash = {143140b40c2a30f960938f40e8a3b141}, intrahash = {fef68b6d2a607ef462592dd73295328d}, issn = {0740-8188}, journal = {Library & Information Science Research}, number = 2, pages = {203 - 221}, title = {End user searching: A Web log analysis of NAVER, a Korean Web search engine}, url = {http://www.sciencedirect.com/science/article/pii/S0740818805000083}, volume = 27, year = 2005 } @article{huntington2008website, abstract = {Metrics derived from user visits or sessions provide a means of evaluating Websites and an important insight into online information seeking behaviour, the most important of them being the duration of sessions and the number of pages viewed in a session, a possible busyness indicator. However, the identification of session (termed often ‘sessionization’) is fraught with difficulty in that there is no way of determining from a transactional log file that a user has ended their session. No one logs out. Instead a session delimiter has to be applied and this is typically done on the basis of a standard period of inactivity. To date researchers have discussed the issue of a time out delimiter in terms of a single value and if a page view time exceeds the cut-off value the session is deemed to have ended. This approach assumes that page view time is a single distribution and that the cut-off value is one point on that distribution. The authors however argue that page time distribution is composed of a number of quite separate view time distributions because of the marked differences in view times between pages (abstract, contents page, full text). This implies that a number of timeout delimiters should be applied. Employing data from a study of the OhioLINK digital journal library, the authors demonstrate how the setting of a time out delimiter impacts on the estimate of page view time and the number of estimated session. Furthermore, they also show how a number of timeout delimiters might apply and they argue that this gives a better and more robust estimate of the number of sessions, session time and page view time compared to an application of a single timeout delimiter.}, author = {Huntington, Paul and Nicholas, David and Jamali, Hamid R.}, doi = {10.1016/j.ipm.2007.03.003}, interhash = {ba30ef785efd6a424b6e05b94cadb536}, intrahash = {565f363f36a9a0a14c7ac44824ec91ad}, issn = {0306-4573}, journal = {Information Processing & Management}, note = {Evaluation of Interactive Information Retrieval Systems}, number = 1, pages = {358 - 372}, title = {Website usage metrics: A re-assessment of session data}, url = {http://www.sciencedirect.com/science/article/pii/S0306457307000817}, volume = 44, year = 2008 } @article{nicholas2007diversity, abstract = {The logs of four universities using the OhioLINK journal system were evaluated for a period of fifteen months using deep log analysis methods in order to compare and contrast the information seeking behaviour of their users. Large differences were found, especially between the research and teaching universities. Methodological problems associated with making the comparisons are discussed in some detail especially in terms of defining online sessions.}, author = {Nicholas, David and Huntington, Paul and Jamali, Hamid R.}, doi = {10.1016/j.acalib.2007.09.001}, interhash = {c9f65d90c47bbd601ae18bba74e329a8}, intrahash = {f4c269e2086b8624c1c0c59ed075d677}, issn = {0099-1333}, journal = {The Journal of Academic Librarianship}, number = 6, pages = {629 - 638}, title = {Diversity in the Information Seeking Behaviour of the Virtual Scholar: Institutional Comparisons}, url = {http://www.sciencedirect.com/science/article/pii/S0099133307001759}, volume = 33, year = 2007 } @article{suneetha2005identifying, author = {Suneetha, K. R. and Krishnamoorthy, K. R.}, interhash = {3df19221981310e32b8325c9226d03cd}, intrahash = {409a7c28e5ab6edebfaccb4d2f4c1503}, journal = {International Journal of Computer Science and Network Security}, number = 4, pages = {327-332}, title = {Identifying User Behavior by Analyzing Web Server Access Log File}, volume = 9, year = 2005 } @article{ivncsy2006frequent, abstract = {Abstract: Frequent pattern mining is a heavily researched area in the field of data mining with wide range of applications. One of them is to use frequent pattern discovery methods in Web log data. Discovering hidden information from Web log data is called Web usage mining. The aim of discovering frequent patterns in Web log data is to obtain information about the navigational behavior of the users. This can be used for advertising purposes, for creating dynamic user profiles etc. In this paper three pattern mining approaches are investigated from the Web usage mining point of view. The different patterns in Web log mining are page sets, page sequences and page graphs.}, author = {Iváncsy, Renáta and Vajk, István}, interhash = {5612ed1c8203908fb94adf7ad8304e12}, intrahash = {f29f4627c9ae99370fc7ba005982e2e6}, journal = {Acta Polytechnica Hungarica}, number = 1, title = {Frequent Pattern Mining in Web Log Data}, url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.101.4559}, volume = 3, year = 2006 } @inproceedings{rose2004understanding, abstract = {Previous work on understanding user web search behavior has focused on how people search and what they are searching for, but not why they are searching. In this paper, we describe a framework for understanding the underlying goals of user searches, and our experience in using the framework to manually classify queries from a web search engine. Our analysis suggests that so-called navigational" searches are less prevalent than generally believed while a previously unexplored "resource-seeking" goal may account for a large fraction of web searches. We also illustrate how this knowledge of user search goals might be used to improve future web search engines.}, acmid = {988675}, address = {New York, NY, USA}, author = {Rose, Daniel E. and Levinson, Danny}, booktitle = {Proceedings of the 13th international conference on World Wide Web}, doi = {10.1145/988672.988675}, interhash = {684bf3f0c2e82239d3b2f932aa9a5ef4}, intrahash = {527fa40ab61aa9987608eed21e3d43eb}, isbn = {1-58113-844-X}, location = {New York, NY, USA}, numpages = {7}, pages = {13--19}, publisher = {ACM}, series = {WWW '04}, title = {Understanding user goals in web search}, url = {http://doi.acm.org/10.1145/988672.988675}, year = 2004 } @incollection{baglioni2003preprocessing, abstract = {We describe the web usage mining activities of an on-going project, called ClickWorld, that aims at extracting models of the navigational behaviour of a web site users. The models are inferred from the access logs of a web server by means of data and web mining techniques. The extracted knowledge is deployed to the purpose of offering a personalized and proactive view of the web services to users. We first describe the preprocessing steps on access logs necessary to clean, select and prepare data for knowledge extraction. Then we show two sets of experiments: the first one tries to predict the sex of a user based on the visited web pages, and the second one tries to predict whether a user might be interested in visiting a section of the site.}, author = {Baglioni, M. and Ferrara, U. and Romei, A. and Ruggieri, S. and Turini, F.}, booktitle = {AI*IA 2003: Advances in Artificial Intelligence}, doi = {10.1007/978-3-540-39853-0_20}, editor = {Cappelli, Amedeo and Turini, Franco}, interhash = {8dc29cedada5b8e9b3571ae77b983e2f}, intrahash = {1607f6c312b64832bab33fa843442d5e}, isbn = {978-3-540-20119-9}, pages = {237-249}, publisher = {Springer Berlin Heidelberg}, series = {Lecture Notes in Computer Science}, title = {Preprocessing and Mining Web Log Data for Web Personalization}, url = {http://dx.doi.org/10.1007/978-3-540-39853-0_20}, volume = 2829, year = 2003 } @article{jansen2006search, abstract = {The use of data stored in transaction logs of Web search engines, Intranets, and Web sites can provide valuable insight into understanding the information-searching process of online searchers. This understanding can enlighten information system design, interface development, and devising the information architecture for content collections. This article presents a review and foundation for conducting Web search transaction log analysis. A methodology is outlined consisting of three stages, which are collection, preparation, and analysis. The three stages of the methodology are presented in detail with discussions of goals, metrics, and processes at each stage. Critical terms in transaction log analysis for Web searching are defined. The strengths and limitations of transaction log analysis as a research method are presented. An application to log client-side interactions that supplements transaction logs is reported on, and the application is made available for use by the research community. Suggestions are provided on ways to leverage the strengths of, while addressing the limitations of, transaction log analysis for Web-searching research. Finally, a complete flat text transaction log from a commercial search engine is available as supplementary material with this manuscript.}, author = {Jansen, Bernard J.}, doi = {10.1016/j.lisr.2006.06.005}, interhash = {0488e60c424ea821ee7b3e3760ffd115}, intrahash = {e147f866b624d461c77a24b79b2d9aff}, issn = {0740-8188}, journal = {Library & Information Science Research}, number = 3, pages = {407 - 432}, title = {Search log analysis: What it is, what's been done, how to do it}, url = {http://www.sciencedirect.com/science/article/pii/S0740818806000673}, volume = 28, year = 2006 } @article{silverstein1999analysis, abstract = {In this paper we present an analysis of an AltaVista Search Engine query log consisting of approximately 1 billion entries for search requests over a period of six weeks. This represents almost 285 million user sessions, each an attempt to fill a single information need. We present an analysis of individual queries, query duplication, and query sessions. We also present results of a correlation analysis of the log entries, studying the interaction of terms within queries. Our data supports the conjecture that web users differ significantly from the user assumed in the standard information retrieval literature. Specifically, we show that web users type in short queries, mostly look at the first 10 results only, and seldom modify the query. This suggests that traditional information retrieval techniques may not work well for answering web search requests. The correlation analysis showed that the most highly correlated items are constituents of phrases. This result indicates it may be useful for search engines to consider search terms as parts of phrases even if the user did not explicitly specify them as such.}, acmid = {331405}, address = {New York, NY, USA}, author = {Silverstein, Craig and Marais, Hannes and Henzinger, Monika and Moricz, Michael}, doi = {10.1145/331403.331405}, interhash = {5e26846be504d4fc6b6a7b236c1c023a}, intrahash = {4ac734beeccbcb3a05786e8ca57f5629}, issn = {0163-5840}, issue_date = {Fall 1999}, journal = {SIGIR Forum}, month = sep, number = 1, numpages = {7}, pages = {6--12}, publisher = {ACM}, title = {Analysis of a very large web search engine query log}, url = {http://doi.acm.org/10.1145/331403.331405}, volume = 33, year = 1999 } @article{nicholas2005scholarly, address = {Bingley}, author = {Nicholas, David and Huntington, Paul and Watkinson, Anthony}, interhash = {10580bc4cff2d14ca913b1682e728c9a}, intrahash = {8e733e3b55a1a648c6e5070d347c43dc}, journal = {Journal of Documentation}, number = 2, pages = {248--280}, publisher = {Emerald Group Publishing Limited}, title = {Scholarly journal usage: the results of deep log analysis}, url = {http://www.emeraldinsight.com/journals.htm?articleid=1465024&show=abstract}, volume = 61, year = 2005 } @inproceedings{jones1998usage, acmid = {276739}, address = {New York, NY, USA}, author = {Jones, Steve and Cunningham, Sally Jo and McNab, Rodger}, booktitle = {Proceedings of the third ACM conference on Digital libraries}, doi = {10.1145/276675.276739}, interhash = {1067f7328bc229ebb11a48be810b8adf}, intrahash = {b61a4bcdb642dd45d2144a7181121f1f}, isbn = {0-89791-965-3}, location = {Pittsburgh, Pennsylvania, USA}, numpages = {2}, pages = {293--294}, publisher = {ACM}, series = {DL '98}, title = {Usage analysis of a digital library}, url = {http://doi.acm.org/10.1145/276675.276739}, year = 1998 } @article{Fuchs2010379, abstract = {Subspace representations that preserve essential information of high-dimensional data may be advantageous for many reasons such as improved interpretability, overfitting avoidance, acceleration of machine learning techniques. In this article, we describe a new subspace representation of time series which we call polynomial shape space representation. This representation consists of optimal (in a least-squares sense) estimators of trend aspects of a time series such as average, slope, curve, change of curve, etc. The shape space representation of time series allows for a definition of a novel similarity measure for time series which we call shape space distance measure. Depending on the application, time series segmentation techniques can be applied to obtain a piecewise shape space representation of the time series in subsequent segments. In this article, we investigate the properties of the polynomial shape space representation and the shape space distance measure by means of some benchmark time series and discuss possible application scenarios in the field of temporal data mining.}, author = {Fuchs, Erich and Gruber, Thiemo and Pree, Helmuth and Sick, Bernhard}, doi = {10.1016/j.neucom.2010.03.022}, interhash = {88c499ac1dc9e9708e70187967494219}, intrahash = {fdf6865c1bece3f77cc3e29365a2c6b3}, issn = {0925-2312}, journal = {Neurocomputing}, note = {Artificial Brains}, number = {1–3}, pages = {379 - 393}, title = {Temporal data mining using shape space representations of time series}, url = {http://www.sciencedirect.com/science/article/pii/S0925231210002237}, volume = 74, year = 2010 } @incollection{ABP:11, author = {Atzmueller, Martin and Beer, Stephanie and Puppe, Frank}, booktitle = {Collaboration and the Semantic Web: Social Networks, Knowledge Networks, and Knowledge Resources}, editor = {Brüggemann, Stefan and d’Amato, Claudia}, interhash = {9c0d3f10e985d8654d8a2eae39121ef2}, intrahash = {781410de8780f9033aae08162cbdf073}, pages = {149-167}, publisher = {IGI Global}, title = {{Data Mining, Validation and Collaborative Knowledge Capture}}, year = 2012 } @inproceedings{Atzmueller:12b, address = {New York, NY, USA}, author = {Atzmueller, Martin}, booktitle = {Proc. 3rd International Workshop on Modeling Social Media (MSM 2012), Hypertext 2012}, interhash = {59cc9a3c8221ebd802aa85acaba4506f}, intrahash = {52600def4f1f0d36b86ed4fbd15e353a}, publisher = {ACM Press}, title = {{Onto Collective Intelligence in Social Media: Exemplary Applications and Perspectives}}, year = 2012 } @incollection{LA:12, address = {Heidelberg, Germany}, alteditor = {Editor}, author = {Lemmerich, Florian and Atzmueller, Martin}, booktitle = {{Modeling and Mining Ubiquitous Social Media}}, interhash = {ae9dc5bfe7f42f1e9ca59aadda4bfd9e}, intrahash = {8435f029bf0e32340e2cf44d0eeb65e7}, publisher = {Springer Verlag}, series = {LNAI}, title = {{Describing Locations using Tags and Images: Explorative Pattern Mining in Social Media}}, url = {http://www.kde.cs.uni-kassel.de/atzmueller/paper/lemmerich-explorative-pattern-mining-socia-media-lnai-2012.pdf}, volume = 7472, year = 2012 } @proceedings{CAH:12, address = {New York, NY, USA}, editor = {Chin, Alvin and Atzmueller, Martin and Helic, Denis}, interhash = {8c21f6c9eb9d4658ca2374f12a63e950}, intrahash = {338ca2515a0f6cd77996d1bbf54c7f4d}, publisher = {ACM Press}, title = {{Proceedings MSM 2012: Workshop on Modeling Social Media -- Collective Intelligence in Social Media}}, year = 2012 } @inproceedings{AL:12a, address = {Heidelberg, Germany}, author = {Atzmueller, Martin and Lemmerich, Florian}, booktitle = {Proc. ECML/PKDD 2012: European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases. accepted}, interhash = {6f00bb468dbc49b2adf426302bcf39f6}, intrahash = {e2aac1cdeabeeb4568e2ef4ffe9464d2}, publisher = {Springer Verlag}, title = {{VIKAMINE - Open-Source Subgroup Discovery, Pattern Mining, and Analytics}}, url = {http://www.kde.cs.uni-kassel.de/atzmueller/paper/atzmueller-vikamine2-ecml-pkdd-2012.pdf}, year = 2012 } @article{Atzmueller:12c, author = {Atzmueller, Martin}, interhash = {0b20c1d53d5df05326d594726273c2fb}, intrahash = {7b616e64994893a2aad95b5ad95db662}, journal = {WIREs: Data Mining and Knowledge Discovery}, title = {{Mining Social Media: Key Players, Sentiments, and Communities}}, volume = {In Press}, year = 2012 } @book{ACHH:12, address = {Heidelberg, Germany}, editor = {Atzmueller, Martin and Chin, Alvin and Helic, Denis and Hotho, Andreas}, interhash = {ebf8e8b66c6c0723092e11e40998d61f}, intrahash = {a0e5d144b39199fa4acb6319f29e7a15}, publisher = {Springer Verlag}, series = {Lecture Notes in Computer Science}, title = {Modeling and Mining Ubiquitous Social Media}, url = {http://www.springer.com/computer/ai/book/978-3-642-33683-6}, volume = 7472, year = 2012 } @inproceedings{SNKA:12, address = {Bristol, UK}, author = {Seipel, Dietmar and Neubeck, Philipp and Köhler, Stefan and Atzmueller, Martin}, booktitle = {Proc. ECML/PKDD Workshop on New Frontiers in Mining Complex Patterns}, interhash = {6ddaf3fd7606e39482a97cbce7a01892}, intrahash = {e80737788c869221f272dd9af03acecf}, title = {{Mining Complex Event Patterns in Computer Networks}}, year = 2012 } @book{AH:12, address = {Bristol, UK}, editor = {Atzmueller, Martin and Hotho, Andreas}, interhash = {ad11fdb5a32814dc9c75b4483dccd6a7}, intrahash = {5304ffe0848eccd3938b50c811d2556d}, publisher = {Workshop Notes}, title = {{Proceedings of the Third International Workshop on Mining Ubiquitous and Social Environments (MUSE 2012)}}, url = {http://www.kde.cs.uni-kassel.de/ws/muse2012/proceedings.pdf}, year = 2012 } @incollection{BAESSG:12, abstract = {To facilitate user-centered software engineering, developers need an easy to grasp understanding of the user. The use of personas helps to keep specific user needs in mind during the design process. Technology acceptance is of particular interest for the design of innovative applications previously unknown to potential users. Therefore, our research focuses on defining a typology of relevant user characteristics with respect to technology acceptance and transferring those findings to the description of personas. The presented work focuses on the statistical relationship between technology acceptance and personality. We apply sub-group discovery as a statistical tool. Based on the statistically derived subgroups and patterns we define the mentioned personas to help developers to understand different forms of technology acceptance. By integrating the specifically defined personas into existing methods in the field of software engineering the feasibility of the presented approach is demonstrated.}, address = {Heidelberg, Germany}, author = {Behrenbruch, Kay and Atzmueller, Martin and Evers, Christoph and Schmidt, Ludger and Stumme, Gerd and Geihs, Kurt}, booktitle = {Human-Centred Software Engineering}, interhash = {1e609af1021c5acbb5db78444c52a9e9}, intrahash = {847830846b80d4507aa4b93d1c8deb83}, pages = {259--266 }, publisher = {Springer}, series = {LNCS}, title = {{A Personality Based Design Approach Using Subgroup Discovery}}, volume = 7623, year = 2012 } @inproceedings{SAS:12, address = {Boston, MA, USA}, author = {Scholz, Christoph and Atzmueller, Martin and Stumme, Gerd}, booktitle = {Proc. Fourth ASE/IEEE International Conference on Social Computing (SocialCom)}, interhash = {9bc5d42018dbe8b926be214190258b3c}, intrahash = {be5ae4b92170e7c595f5fdcac15b4786}, publisher = {IEEE Computer Society}, title = {{On the Predictability of Human Contacts: Influence Factors and the Strength of Stronger Ties}}, url = {http://www.kde.cs.uni-kassel.de/atzmueller/paper/scholz-on-f2f-predictability-socialcom-2012.pdf}, year = 2012 } @incollection{ADHMS:12, address = {Heidelberg, Germany}, alteditor = {Editor}, author = {Atzmueller, Martin and Doerfel, Stephan and Hotho, Andreas and Mitzlaff, Folke and Stumme, Gerd}, booktitle = {{Modeling and Mining Ubiquitous Social Media}}, interhash = {4f1f4b515b01cc448a91b3e368deabad}, intrahash = {d81d6f6ccdf3ff6572898d39c90e6354}, publisher = {Springer Verlag}, series = {LNAI}, title = {Face-to-Face Contacts at a Conference: Dynamics of Communities and Roles}, url = {http://www.kde.cs.uni-kassel.de/atzmueller/paper/atzmueller-face-to-face-contacts-dynamics-lnai-2012.pdf}, volume = 7472, year = 2012 } @book{AH:12, address = {Bristol, UK}, editor = {Atzmueller, Martin and Hotho, Andreas}, interhash = {ad11fdb5a32814dc9c75b4483dccd6a7}, intrahash = {5304ffe0848eccd3938b50c811d2556d}, publisher = {Workshop Notes}, title = {{Proceedings of the Third International Workshop on Mining Ubiquitous and Social Environments (MUSE 2012)}}, url = {http://www.kde.cs.uni-kassel.de/ws/muse2012/proceedings.pdf}, year = 2012 } @book{ACHH:12, address = {Heidelberg, Germany}, editor = {Atzmueller, Martin and Chin, Alvin and Helic, Denis and Hotho, Andreas}, interhash = {ebf8e8b66c6c0723092e11e40998d61f}, intrahash = {a0e5d144b39199fa4acb6319f29e7a15}, publisher = {Springer Verlag}, series = {Lecture Notes in Computer Science}, title = {Modeling and Mining Ubiquitous Social Media}, url = {http://www.springer.com/computer/ai/book/978-3-642-33683-6}, volume = 7472, year = 2012 } @article{liu2012crowdsourcing, abstract = {Some complex problems, such as image tagging and natural language processing, are very challenging for computers, where even state-of-the-art technology is yet able to provide satisfactory accuracy. Therefore, rather than relying solely on developing new and better algorithms to handle such tasks, we look to the crowdsourcing solution -- employing human participation -- to make good the shortfall in current technology. Crowdsourcing is a good supplement to many computer tasks. A complex job may be divided into computer-oriented tasks and human-oriented tasks, which are then assigned to machines and humans respectively.

To leverage the power of crowdsourcing, we design and implement a Crowdsourcing Data Analytics System, CDAS. CDAS is a framework designed to support the deployment of various crowdsourcing applications. The core part of CDAS is a quality-sensitive answering model, which guides the crowdsourcing engine to process and monitor the human tasks. In this paper, we introduce the principles of our quality-sensitive model. To satisfy user required accuracy, the model guides the crowdsourcing query engine for the design and processing of the corresponding crowdsourcing jobs. It provides an estimated accuracy for each generated result based on the human workers' historical performances. When verifying the quality of the result, the model employs an online strategy to reduce waiting time. To show the effectiveness of the model, we implement and deploy two analytics jobs on CDAS, a twitter sentiment analytics job and an image tagging job. We use real Twitter and Flickr data as our queries respectively. We compare our approaches with state-of-the-art classification and image annotation techniques. The results show that the human-assisted methods can indeed achieve a much higher accuracy. By embedding the quality-sensitive model into crowdsourcing query engine, we effectively reduce the processing cost while maintaining the required query answer quality.}, acmid = {2336676}, author = {Liu, Xuan and Lu, Meiyu and Ooi, Beng Chin and Shen, Yanyan and Wu, Sai and Zhang, Meihui}, interhash = {41ad6e73b03373d76d3164ba248335d7}, intrahash = {2091967734f96c4afbc09319d48a8c65}, issn = {2150-8097}, issue_date = {June 2012}, journal = {Proceedings of the VLDB Endowment}, month = jun, number = 10, numpages = {12}, pages = {1040--1051}, publisher = {VLDB Endowment}, title = {CDAS: a crowdsourcing data analytics system}, url = {http://dl.acm.org/citation.cfm?id=2336664.2336676}, volume = 5, year = 2012 } @article{rauber2009webarchivierung, abstract = { In den letzten Jahren haben Bibliotheken und Archive zunehmend die Aufgabe übernommen, neben konventionellen Publikationen auch Inhalte aus dem World Wide Web zu sammeln, um so diesen wertvollen Teil unseres kulturellen Erbes zu bewahren und wichtige Informationen langfristig verfügbar zu halten. Diese massiven Datensammlungen bieten faszinierende Möglichkeiten, rasch Zugriff auf wichtige Informationen zu bekommen, die im Live-Web bereits verloren gegangen sind. Sie sind eine unentbehrliche Quelle für Wissenschaftler, die in der Zukunft die gesellschaftliche und technologische Entwicklung unserer Zeit nachvollziehen wollen. Auf der anderen Seite stellt eine derartige Datensammlung aber einen völlig neuen Datenbestand dar, der nicht nur rechtliche, sondern auch zahlreiche ethische Fragen betreffend seine Nutzung aufwirft. Diese werden in dem Ausmaß zunehmen, in dem die technischen Möglichkeiten zur automatischen Analyse und Interpretation dieser Daten leistungsfähiger werden. Da sich die meisten Webarchivierungsinitiativen dieser Problematik bewusst sind, bleibt die Nutzung der Daten derzeit meist stark eingeschränkt, oder es wird eine Art von "Opt-Out"-Möglichkeit vorgesehen, wodurch Webseiteninhaber die Aufnahme ihrer Seiten in ein Webarchiv ausschließen können. Mit beiden Ansätzen können Webarchive ihr volles Nutzungspotenzial nicht ausschöpfen. Dieser Artikel beschreibt einleitend kurz die Technologien, die zur Sammlung von Webinhalten zu Archivierungszwecken verwendet werden. Er hinterfragt Annahmen, die die freie Verfügbarkeit der Daten und unterschiedliche Nutzungsarten betreffen. Darauf aufbauend identifiziert er eine Reihe von offenen Fragen, deren Lösung einen breiteren Zugriff und bessere Nutzung von Webarchiven erlauben könnte. }, author = {Rauber, Andreas and Kaiser, Max}, editor = {Knoll, Matthias and Meier, Andreas}, interhash = {3b35b676a2817868d93481aeebfa4154}, intrahash = {cdaef18169a7d8300cf54daf018a74cc}, issn = {1436-3011}, journal = {HMD Praxis der Wirtschaftsinformatik}, month = aug, publisher = {dpunkt.verlag}, title = {Webarchivierung und Web Archive Mining: Notwendigkeit, Probleme und Lösungsansätze}, url = {http://hmd.dpunkt.de/268/03.php}, volume = 268, year = 2009 } @incollection{atzmueller2012facetoface, address = {Heidelberg, Germany}, alteditor = {Editor}, author = {Atzmueller, Martin and Doerfel, Stephan and Hotho, Andreas and Mitzlaff, Folke and Stumme, Gerd}, booktitle = {{Modeling and Mining Ubiquitous Social Media}}, interhash = {4f1f4b515b01cc448a91b3e368deabad}, intrahash = {d81d6f6ccdf3ff6572898d39c90e6354}, publisher = {Springer Verlag}, series = {LNAI}, title = {Face-to-Face Contacts at a Conference: Dynamics of Communities and Roles}, volume = 7472, year = 2012 } @inproceedings{conf/birthday/BloehdornBCGHLMMSSV11, author = {Bloehdorn, Stephan and Blohm, Sebastian and Cimiano, Philipp and Giesbrecht, Eugenie and Hotho, Andreas and Lösch, Uta and Mädche, Alexander and Mönch, Eddie and Sorg, Philipp and Staab, Steffen and Völker, Johanna}, booktitle = {Foundations for the Web of Information and Services}, crossref = {conf/birthday/2011studer}, editor = {Fensel, Dieter}, ee = {http://dx.doi.org/10.1007/978-3-642-19797-0_7}, interhash = {db48314326a36fc4ac8770cba2c20e49}, intrahash = {21be5153a8f491c9f209d57ce7662387}, isbn = {978-3-642-19796-3}, pages = {115-142}, publisher = {Springer}, title = {Combining Data-Driven and Semantic Approaches for Text Mining.}, url = {http://dblp.uni-trier.de/db/conf/birthday/studer2011.html#BloehdornBCGHLMMSSV11}, year = 2011 } @inproceedings{conf/birthday/HothoS11, author = {Hotho, Andreas and Stumme, Gerd}, booktitle = {Foundations for the Web of Information and Services}, crossref = {conf/birthday/2011studer}, editor = {Fensel, Dieter}, ee = {http://dx.doi.org/10.1007/978-3-642-19797-0_8}, interhash = {502dc9bea95f0c581a37cd39cae2ff09}, intrahash = {845a2a6bf9a43be9e85741a6c7d2aa2d}, isbn = {978-3-642-19796-3}, pages = {143-153}, publisher = {Springer}, title = {From Semantic Web Mining to Social and Ubiquitous Mining - A Subjective View on Past, Current, and Future Research.}, url = {http://dblp.uni-trier.de/db/conf/birthday/studer2011.html#HothoS11}, year = 2011 } @inproceedings{default, author = {Pak, Alexander and Paroubek, Patrick}, interhash = {ac930b0459a3c8a2fc2d74c52a475026}, intrahash = {ba1358f07702423b60c9e94f8aa5985c}, issue = {10}, pages = {1320-1326}, title = {Twitter as a Corpus for Sentiment Analysis and Opinion Mining}, url = {http://www.mendeley.com/research/twitter-corpus-sentiment-analysis-opinion-mining-18/}, volume = 2010, year = 2010 } @inproceedings{mitzlaff2011community, author = {Mitzlaff, Folke and Atzmueller, Martin and Benz, Dominik and Hotho, Andreas and Stumme, Gerd}, booktitle = {Analysis of Social Media and Ubiquitous Data}, interhash = {1ef065a81ed836dfd31fcc4cd4da133b}, intrahash = {0f45e870093c053e6f41f54c14bda46b}, series = {LNAI}, title = {{Community Assessment using Evidence Networks}}, volume = 6904, year = 2011 } @proceedings{bjrnelmarmacek2011profile, address = {Boston, MA, USA}, author = {{Björn-Elmar Macek, Martin Atzmueller, Gerd Stumme}}, interhash = {1af59b6893ebc053e42bca152000c0a1}, intrahash = {8ad9a4592710e41d9d7fb9eba0cee79c}, publisher = {IEEE Computer Society}, title = {Profile Mining in CVS-Logs and Face-to-Face Contacts for Recommending Software Developers}, volume = {Proc. IEEE SocialCom}, year = 2011 } @book{AH:11, address = {Athens, Greece}, editor = {Atzmueller, Martin and Hotho, Andreas}, interhash = {8215abfd0550872bdd023c532f21e257}, intrahash = {87b0576f542aa6de352a2bff1428ce6b}, publisher = {ECML/PKDD 2011}, title = {{Proceedings of the 2011 International Workshop on Mining Ubiquitous and Social Environments (MUSE 2011)}}, year = 2011 } @inproceedings{mitzlaff2011community, author = {Mitzlaff, Folke and Atzmueller, Martin and Benz, Dominik and Hotho, Andreas and Stumme, Gerd}, booktitle = {Analysis of Social Media and Ubiquitous Data}, interhash = {1ef065a81ed836dfd31fcc4cd4da133b}, intrahash = {0f45e870093c053e6f41f54c14bda46b}, series = {LNAI}, title = {{Community Assessment using Evidence Networks}}, volume = 6904, year = 2011 } @inproceedings{atzmueller2011towards, author = {Atzmueller, Martin and Benz, Dominik and Hotho, Andreas and Stumme, Gerd}, booktitle = {Proceedings of the 4th international workshop on Social Data on the Web (SDoW2011)}, editor = {Passant, Alexandre and Fernández, Sergio and Breslin, John and Bojārs, Uldis}, interhash = {65222f0ccc23063a2a15c0a7fd5513a0}, intrahash = {a47a41658592202811f0139d4bb65871}, title = {Towards Mining Semantic Maturity in Social Bookmarking Systems}, url = {http://www.kde.cs.uni-kassel.de/pub/pdf/atzmueller2011towards.pdf}, year = 2011 } @proceedings{berendt2004web, address = {Heidelberg}, editor = {Berendt, Bettina and Hotho, Andreas and Mladenic, Dunja and van Someren, Maarten and Spiliopoulou, Myra and Stumme, Gerd}, interhash = {d7c78aa71fa7487dacfd9172467f1912}, intrahash = {be8b91e0c05dde74a4af7123b949c6ac}, isbn = {3-540-23258-3}, note = {http://km.aifb.uni-karlsruhe.de/ws/ewmf03/}, publisher = {Springer}, series = {LNAI}, title = {Web Mining: From Web to Semantic Web, First European Web Mining Forum, EMWF 2003, Cavtat-Dubrovnik, Croatia, September 22, 2003, Revised Selected and Invited Papers}, url = {http://springerlink.metapress.com/content/unvvag26dttf/}, volume = 3209, year = 2004 } @inproceedings{berendt2004roadmap, abstract = {The purpose of Web mining is to develop methods and systems for discovering models of objects and processes on the World Wide Web and for web-based systems that show adaptive performance. Web Mining integrates three parent areas: Data Mining (we use this term here also for the closely related areas of Machine Learning and Knowledge Discovery), Internet technology and World Wide Web, and for the more recent Semantic Web. The World Wide Web has made an enormous amount of information electronically accessible. The use of email, news and markup languages like HTML allow users to publish and read documents at a world-wide scale and to communicate via chat connections, including information in the form of images and voice records. The HTTP protocol that enables access to documents over the network via Web browsers created an immense improvement in communication and access to information. For some years these possibilities were used mostly in the scientific world but recent years have seen an immense growth in popularity, supported by the wide availability of computers and broadband communication. The use of the internet for other tasks than finding information and direct communication is increasing, as can be seen from the interest in ldquoe-activitiesrdquo such as e-commerce, e-learning, e-government, e-science.}, address = {Heidelberg}, author = {Berendt, Bettina and Hotho, Andreas and Mladenic, Dunja and van Someren, Maarten and Spiliopoulou, Myra and Stumme, Gerd}, bibsource = {DBLP, http://dblp.uni-trier.de}, booktitle = {Web Mining: From Web to Semantic Web}, editor = {Berendt, Bettina and Hotho, Andreas and Mladenic, Dunja and van Someren, Maarten and Spiliopoulou, Myra and Stumme, Gerd}, ee = {http://springerlink.metapress.com/openurl.asp?genre=article{\&}issn=0302-9743{\&}volume=3209{\&}spage=1}, interhash = {a27cc056d337718c1e10a3f9e3048143}, intrahash = {158a99f0088fcefa45b1eb7f96aa5673}, pages = {1-22}, publisher = {Springer}, title = {A Roadmap for Web Mining: From Web to Semantic Web.}, url = {http://www.kde.cs.uni-kassel.de/stumme/papers/2004/berendt2004roadmap.pdf}, volume = 3209, year = 2004 } @incollection{berendt04usage, abstract = {Semantic Web Mining aims at combining the two fast-developing research areas Semantic Web and Web Mining. Web Mining aims at discovering insights about the meaning of Web resources and their usage. Given the primarily syntactical nature of data Web mining operates on, the discovery of meaning is impossible based on these data only. Therefore, formalizations of the semantics of Web resources and navigation behavior are increasingly being used. This fits exactly with the aims of the Semantic Web: the Semantic Web enriches the WWW by machine-processable information which supports the user in his tasks. In this paper, we discuss the interplay of the Semantic Web with Web Mining, with a specific focus on usage mining.}, address = {Boston}, author = {Berendt, Bettina and Hotho, Andreas and Stumme, Gerd}, booktitle = {Data Mining Next Generation Challenges and Future Directions}, editor = {Kargupta, Hillol and Joshi, Anupam and Sivakumar, Krishnamoorthy and Yesha, Yelena}, interhash = {272317659fc9bad03d6083a7afc1bc35}, intrahash = {0ef00fe39718eae61dca4d251b14578d}, isbn = {0-262-61203-8}, pages = {461-481}, publisher = {AAAI Press}, title = {Usage Mining for and on the Semantic Web}, url = {http://www.kde.cs.uni-kassel.de/stumme/papers/2004/berendt04usage.pdf}, year = 2004 } @inproceedings{berendt05semantic, author = {Berendt, Bettina and Hotho, Andreas and Stumme, Gerd}, booktitle = {Proc. of the 1st Intl. Workshop on Representation and Analysis of Web Space}, editor = {Svatek, Vojtech and Snasel, Vaclav}, interhash = {e4b853ff13b63a88b009610890b89348}, intrahash = {f8826ba2790eeb857dd4becb31a08225}, isbn = {80-248-0864-1}, pages = {1-16}, publisher = {Technical University of Ostrava}, title = {Semantic Web Mining and the Representation, Analysis, and Evolution of Web Space}, url = {http://www.kde.cs.uni-kassel.de/stumme/papers/2005/berendt2005semantic.pdf}, year = 2005 } @proceedings{berendt05european, editor = {Berendt, Bettina and Hotho, Andreas and Mladenic, Dunja and Semerano, Giovanni and Spiliopoulou, Myra and Stumme, Gerd and van Someren, Maarten}, interhash = {6dfd547a42a7bd5ccacce75ea8875704}, intrahash = {f306e43da22adede0286917d5d83eb3b}, title = {Proc. of the European Web Mining Forum 2005}, url = {http://www.kde.cs.uni-kassel.de/ws/ewmf05}, year = 2005 } @article{pasquier2005generating, author = {Pasquier, Nicolas and Taouil, Rafik and Bastide, Yves and Stumme, Gerd and Lakhal, Lotfi}, comment = {alpha}, interhash = {cb0ee99fae39f2a5e0af5be9d97978f5}, intrahash = {40f59a7fa7ce5015f9ee81709db89de0}, journal = {Journal Intelligent Information Systems (JIIS)}, number = 1, pages = {29-60}, publisher = {Kluwer Academic Publishers}, title = {Generating a Condensed Representation for Association Rules}, url = {http://www.kde.cs.uni-kassel.de/stumme/papers/2005/pasquier2005generating.pdf}, volume = 24, year = 2005 } @article{themenheft2007webmining, author = {Hotho, Andreas and Stumme, Gerd}, interhash = {39f94bf3a1663d9cec6a6cb8354a9bd9}, intrahash = {e9535ec82afa53f44a1b37704aa9a71f}, journal = {Künstliche Intelligenz}, number = 3, pages = {5-8}, title = {Mining the World Wide Web -- Methods, Ap- plications, and Perspectives}, url = {http://www.kuenstliche-intelligenz.de/index.php?id=7758}, year = 2007 } @proceedings{themenheft2007webmining, editor = {Hotho, Andreas and Stumme, Gerd}, interhash = {83c28b86f2ac897e906660e54e6fffc0}, intrahash = {c73311bb72ad480d74125dbc9d94c450}, journal = {Künstliche Intelligenz}, number = 3, pages = {5-8}, title = {Themenheft Web Mining, Künstliche Intelligenz}, url = {http://www.kuenstliche-intelligenz.de/index.php?id=7758}, year = 2007 } @book{Semantic2006Ackermann, address = {Heidelberg}, editor = {Ackermann, Markus and Berendt, Bettina and Grobelnik, Marko and Hotho, Andreas and Mladenic, Dunja and Semeraro, Giovanni and Spiliopoulou, Myra and Stumme, Gerd and Svatek, Vojtech and van Someren, Maarten}, interhash = {f4891d2967571b77f1276298f112158f}, intrahash = {f8335dd42e7eec61d60bbeced71ff3be}, isbn13 = {978-3-540-47697-9}, issn = {0302-9743}, publisher = {Springer}, title = {Semantics, Web and Mining}, url = {http://dx.doi.org/10.1007/11908678}, year = 2006 } @inbook{lakhal2005efficient, abstract = {Association rules are a popular knowledge discovery technique for warehouse basket analysis. They indicate which items of the warehouse are frequently bought together. The problem of association rule mining has first been stated in 1993. Five years later, several research groups discovered that this problem has a strong connection to Formal Concept Analysis (FCA). In this survey, we will first introduce some basic ideas of this connection along a specific algorithm, \titanic, and show how FCA helps in reducing the number of resulting rules without loss of information, before giving a general overview over the history and state of the art of applying FCA for association rule mining.}, address = {Heidelberg}, author = {Lakhal, Lotfi and Stumme, Gerd}, booktitle = {Formal Concept Analysis: Foundations and Applications}, editor = {Ganter, Bernhard and Stumme, Gerd and Wille, Rudolf}, ee = {http://dx.doi.org/10.1007/11528784_10}, interhash = {f5777a0f9dccfcf4f9968119d77297fc}, intrahash = {2b350f817428e4c6c7259cd279815091}, pages = {180-195}, publisher = {Springer}, series = {LNAI}, title = {Efficient Mining of Association Rules Based on Formal Concept Analysis}, url = {http://www.kde.cs.uni-kassel.de/stumme/papers/2005/lakhal2005efficient.pdf}, volume = 3626, year = 2005 } @article{WA:10, author = {Weiss, Christian and Atzmueller, Martin}, interhash = {b14b157e6210792724d46c06af7aec5a}, intrahash = {4edcf7521be16ba8fb819d2e13f8e4d6}, journal = {Quality and Reliability Engineering}, title = {EWMA Control Charts for Monitoring Binary Processes with Applications to Medical Diagnosis Data}, year = 2010 } @inproceedings{LA:10, author = {Lemmerich, Florian and Atzmueller, Martin}, booktitle = {Proc. 23rd FLAIRS Conference}, interhash = {60aec4cb1288ac3b10cac5941a24bd90}, intrahash = {15793feea44048a02344592d85f797a2}, title = {Fast Discovery of Relevant Subgroup Patterns}, year = 2010 } @inproceedings{AB:10, author = {Atzmueller, Martin and Beer, Stephanie}, booktitle = {Proc. 55th IWK, International Workshop on Design, Evaluation and Refinement of Intelligent Systems (DERIS)}, interhash = {7e80a6b45a723165b02d8e33581da64e}, intrahash = {a735dbe20e7e04c577c3eb4e67ebede2}, publisher = {University of Ilmenau}, title = {Validation of Mixed-Structured Data Using Pattern Mining and Information Extraction}, year = 2010 } @inproceedings{KLA:10, author = {Kaempgen, Benedikt and Lemmerich, Florian and Atzmueller, Martin}, booktitle = {Proc. 55th IWK, International Workshop on Design, Evaluation and Refinement of Intelligent Systems (DERIS)}, interhash = {3bddbc334c820858b48ae85c13453aea}, intrahash = {f5965d05c7d74d2db0ee3da004d9f2ea}, title = {Decision-Maker-Aware Design of Descriptive Data Mining}, year = 2010 } @inproceedings{AR:10a, author = {Atzmueller, Martin and Roth-Berghofer, Thomas}, booktitle = {Proc. 30th SGAI International Conference on Artificial Intelligence (AI-2010)}, interhash = {cd60e1435e394de7edde93e76c04795f}, intrahash = {18d53f93993aff7d941de9df7089bd4b}, title = {The Mining and Analysis Continuum of Explaining Uncovered}, year = 2010 } @inproceedings{AR:10b, author = {Atzmueller, Martin and Roth-Berghofer, Thomas}, booktitle = {Proc. Workshop on Explanation-aware Computing ExaCt 2010 @ ECAI 2010}, interhash = {7dab16aeb2f5567ca898435afde31a4f}, intrahash = {d958a4e9a180a47b82c279a727ac53a5}, title = {Towards Explanation-Aware Social Software: Applying the Mining and Analysis Continuum of Explaining}, year = 2010 } @techreport{AR:10c, author = {Atzmueller, Martin and Roth-Berghofer, Thomas}, institution = {Deutsches Forschungszentrum f\"ur K\"unstliche Intelligenz}, interhash = {d3123967ce044a28cbd287098022b609}, intrahash = {8d48df8503de54b5ce138a85a1db60bd}, number = {RR-10-02}, title = {Ready for the MACE? The Mining and Analysis Continuum of Explaining Uncovered}, year = 2010 } @book{ABHS:10, editor = {Atzmueller, Martin and Benz, Dominik and Hotho, Andreas and Stumme, Gerd}, interhash = {9815398a19b44982b6e1b406d1eea00c}, intrahash = {060c675871a5e2173af200bd12f6f3ff}, publisher = {Department of Electrical Engineering/Computer Science, Kassel University}, series = {Technical report (KIS), 2010-10}, title = {{Proceedings of the LWA 2010 - Lernen, Wissen, Adaptivit\"at}}, year = 2010 } @article{berendt2010bridging, author = {Berendt, Bettina and Hotho, Andreas and Stumme, Gerd}, doi = {DOI: 10.1016/j.websem.2010.04.008}, interhash = {4969eb2b7bf1fabe60c5f23ab6383d77}, intrahash = {f8d7bc2af5753906dc3897196daac18c}, issn = {1570-8268}, journal = {Web Semantics: Science, Services and Agents on the World Wide Web}, note = {Bridging the Gap--Data Mining and Social Network Analysis for Integrating Semantic Web and Web 2.0; The Future of Knowledge Dissemination: The Elsevier Grand Challenge for the Life Sciences}, number = {2-3}, pages = {95 - 96}, title = {Bridging the Gap--Data Mining and Social Network Analysis for Integrating Semantic Web and Web 2.0}, url = {http://www.sciencedirect.com/science/article/B758F-4YXK4HW-1/2/4cb514565477c54160b5e6eb716c32d7}, volume = 8, year = 2010 } @incollection{Atzmueller:11, author = {Atzmueller, Martin}, booktitle = {Applied Natural Language Processing and Content Analysis: Advances in Identification, Investigation and Resolution.}, editor = {McCarthy, Philip M. and Boonthum, Chutima}, interhash = {b7d700872f84e545b714b9cc59b0c188}, intrahash = {c930dbfed60e5a6d20e8785181f42feb}, publisher = {IGI Global}, title = {Data Mining}, year = 2011 } @book{AH:10, address = {Barcelona, Spain}, editor = {Atzmueller, Martin and Hotho, Andreas}, interhash = {7260d64afa0eadf785be450646aec5c4}, intrahash = {684691da3230424a4b6aef804cd27579}, publisher = {ECML/PKDD 2010}, title = {{Proceedings of the 2010 Workshop on Mining Ubiquitous and Social Environments (MUSE 2010)}}, year = 2010 } @book{AKBFR:10, editor = {Atzmueller, Martin and Knauf, Rainer and Bode, Stephan and Farooq, Qurat-Ul-Ann and Riebisch, Matthias}, interhash = {0149fc5ac19abfbaf9c1af490acf04fc}, intrahash = {864ca9616f4b078125961d4289755ef3}, publisher = {RWTH Aachen University}, series = {CEUR-WS}, title = {The IWK2010 Workshops: DERIS2010 and EMDT2010. Proceedings of the IWK2010 Workshops: International Workshop on Design, Evaluation and Refinement of Intelligent Systems (DERIS2010) and the First International Workshop on Evolution Support for Model-Based Development and Testing (EMDT2010)}, volume = 646, year = 2010 } @inproceedings{atzmueller2010towards, author = {Atzmueller, Martin and Mitzlaff, Folke}, booktitle = {Workshop on Mining Patterns and Subgroups}, interhash = {3669737df3d351bf27636b9dc07c13ee}, intrahash = {2f61938116e8b239fbff58520ecc284c}, publisher = {Lorentz Center, Leiden, The Netherlands. Awarded with the Best Discovery Award}, title = {{Towards Mining Descriptive Community Patterns}}, year = 2010 } @incollection{ABP:11, author = {Atzmueller, Martin and Beer, Stephanie and Puppe, Frank}, booktitle = {Collaboration and the Semantic Web: Social Networks, Knowledge Networks and Knowledge Resources.}, editor = {Brüggemann, Stefan and d’Amato, Claudia}, interhash = {83cf9bd4264c938fa454c381a69e9880}, intrahash = {63df2eaae8a6df990b0a56379531a242}, publisher = {IGI Global}, title = {{Data Mining, Validation and Collaborative Knowledge Capture}}, year = 2011 } @inproceedings{DBLP:conf/pkdd/ADHSS11, author = {Scholz, Christoph and Doerfel, Stephan and Atzmueller, Martin and Hotho, Andreas and Stumme, Gerd}, interhash = {d81c55cdcdf8ee331595bbb4d6fd51d6}, intrahash = {c1614b434eb13f0f42884ccffae8141d}, title = {Resource-Aware On-Line RFID Localization Using Proximity Data}, year = 2011 } @inproceedings{ADHMS:11, address = {to appear}, author = {Atzmueller, Martin and Doerfel, Stephan and Hotho, Andreas and Mitzlaff, Folke and Stumme, Gerd}, booktitle = {Proc. Workshop on Mining Ubiquitous and Social Environments (MUSE 2011) at ECML/PKDD 2011}, interhash = {49e97def917e352ca21ab2e3eb7bd88a}, intrahash = {1fe037ea2712b205c564243d67840059}, title = {{Face-to-Face Contacts during a Conference: Communities, Roles, and Key Players}}, year = 2011 } @inproceedings{mitzlaff2011community, author = {Mitzlaff, Folke and Atzmueller, Martin and Benz, Dominik and Hotho, Andreas and Stumme, Gerd}, booktitle = {Analysis of Social Media and Ubiquitous Data}, interhash = {1ef065a81ed836dfd31fcc4cd4da133b}, intrahash = {0f45e870093c053e6f41f54c14bda46b}, series = {LNAI}, title = {{Community Assessment using Evidence Networks}}, volume = 6904, year = 2011 } @inproceedings{atzmueller2011vikamine, author = {Atzmueller, Martin and Lemmerich, Florian}, booktitle = {Proceedings of the LWA 2011 - Learning, Knowledge, Adaptation}, interhash = {79b076947a2b7c00e234adf840a7636c}, intrahash = {56521f54efa8a332927a512bafcd8042}, title = {VIKAMINE - A Rich-Client Environment for Intelligent Pattern Mining and Subgroup Discovery}, year = 2011 } @inproceedings{atzmueller2011towards, author = {Atzmueller, Martin and Benz, Dominik and Hotho, Andreas and Stumme, Gerd}, booktitle = {Proceedings of the 4th international workshop on Social Data on the Web (SDoW2011)}, editor = {Passant, Alexandre and Fernández, Sergio and Breslin, John and Bojars, Uldis}, interhash = {65222f0ccc23063a2a15c0a7fd5513a0}, intrahash = {46119d149e72a77972b025899aa3a94f}, title = {Towards Mining Semantic Maturity in Social Bookmarking Systems}, url = {http://www.kde.cs.uni-kassel.de/pub/pdf/atzmueller2011towards.pdf}, year = 2011 } @article{bjrnelmarmacek2011profile, author = {{Björn-Elmar Macek, Martin Atzmueller, Gerd Stumme}}, interhash = {1af59b6893ebc053e42bca152000c0a1}, intrahash = {cb083b63f96664bcdba719391664c601}, title = {Profile Mining in CVS-Logs and Face-to-Face Contacts for Recommending Software Developers}, year = 2011 } @inproceedings{LA:11MSM, author = {Lemmerich, Florian and Atzmueller, Martin}, booktitle = {Proc. IEEE SocialCom 2011, Workshop on Modeling Social Media (MSM 2011)}, interhash = {c2b76c00f7f484d6f46b361a9f0336cf}, intrahash = {222ccbe9b3a900091c4148e2a93fff81}, publisher = {IEEE Computer Society}, title = {Modeling Location-Based Profiles of Social Image Media using Explorative Pattern Mining}, year = 2011 } @book{AH:11, address = {Athens, Greece}, editor = {Atzmueller, Martin and Hotho, Andreas}, interhash = {8215abfd0550872bdd023c532f21e257}, intrahash = {87b0576f542aa6de352a2bff1428ce6b}, publisher = {ECML/PKDD 2011}, title = {{Proceedings of the 2011 International Workshop on Mining Ubiquitous and Social Environments (MUSE 2011)}}, year = 2011 } @inproceedings{Atzmueller:11b, address = {Glasgow, Scotland, UK}, author = {Atzmueller, Martin}, booktitle = {Proc. 20th Intl. Conference on Information and Knowledge Management (CIKM 2011)}, interhash = {bb01236a8a7dd36a09c79a2fa81c7b96}, intrahash = {00063fadb503f4efbdfabf7f4c2f269f}, publisher = {ACM Press}, title = {Analysis of Communities in Social Media}, year = 2011 } @article{an2004characterizing, acmid = {1031388}, address = {New York, NY, USA}, author = {An, Yuan and Janssen, Jeannette and Milios, Evangelos E.}, doi = {http://dx.doi.org/10.1007/s10115-003-0128-3}, interhash = {73fdd0592c1641d05da5d2323d9f59ae}, intrahash = {60e0c625f5765a05c588c6765a8cd93c}, issn = {0219-1377}, issue = {6}, journal = {Knowl. Inf. Syst.}, month = {November}, numpages = {15}, pages = {664--678}, publisher = {Springer-Verlag New York, Inc.}, title = {Characterizing and Mining the Citation Graph of the Computer Science Literature}, url = {http://dx.doi.org/10.1007/s10115-003-0128-3}, volume = 6, year = 2004 } @inproceedings{conf/icdm/YassineH10, author = {Yassine, Mohamed and Hajj, Hazem}, booktitle = {ICDM Workshops}, crossref = {conf/icdm/2010w}, editor = {Fan, Wei and Hsu, Wynne and Webb, Geoffrey I. and Liu, Bing and Zhang, Chengqi and Gunopulos, Dimitrios and Wu, Xindong}, ee = {http://dx.doi.org/10.1109/ICDMW.2010.75}, interhash = {72ae8c258d6559e4a90370453ecc2acc}, intrahash = {8b0afeee143cec94f3058c214ae38c6f}, pages = {1136-1142}, publisher = {IEEE Computer Society}, title = {A Framework for Emotion Mining from Text in Online Social Networks.}, url = {http://dblp.uni-trier.de/db/conf/icdm/icdmw2010.html#YassineH10}, year = 2010 } @inproceedings{atzmueller2011facetoface, author = {Atzmueller, Martin and Doerfel, Stephan and Hotho, Andreas and Mitzlaff, Folke and Stumme, Gerd}, booktitle = {Proc. Workshop on Mining Ubiquitous and Social Environments (MUSE 2011) at ECML/PKDD 2011}, interhash = {49e97def917e352ca21ab2e3eb7bd88a}, intrahash = {1fe037ea2712b205c564243d67840059}, title = {Face-to-Face Contacts during a Conference: Communities, Roles, and Key Players}, year = 2011 } @misc{Rubin2011, abstract = { Machine learning approaches to multi-label document classification have (to date) largely relied on discriminative modeling techniques such as support vector machines. A drawback of these approaches is that performance rapidly drops off as the total number of labels and the number of labels per document increase. This problem is amplified when the label frequencies exhibit the type of highly skewed distributions that are often observed in real-world datasets. In this paper we investigate a class of generative statistical topic models for multi-label documents that associate individual word tokens with different labels. We investigate the advantages of this approach relative to discriminative models, particularly with respect to classification problems involving large numbers of relatively rare labels. We compare the performance of generative and discriminative approaches on document labeling tasks ranging from datasets with several thousand labels to datasets with tens of labels. The experimental results indicate that generative models can achieve competitive multi-label classification performance compared to discriminative methods, and have advantages for datasets with many labels and skewed label frequencies. }, author = {Rubin, Timothy N. and Chambers, America and Smyth, Padhraic and Steyvers, Mark}, interhash = {e09d5d8587756d460a5d834025e75aac}, intrahash = {f8a5a3958ae264d19c7f5415eb7f0bce}, note = {cite arxiv:1107.2462}, title = {Statistical Topic Models for Multi-Label Document Classification}, url = {http://arxiv.org/abs/1107.2462}, year = 2011 } @book{srivastava2009mining, abstract = {Giving a broad perspective of the field from numerous vantage points, 'Text Mining' focuses on statistical methods for text mining and analysis. It examines methods to automatically cluster and classify text documents and applies these methods in a variety of areas.}, address = {Boca Raton, FL}, author = {Srivastava, Asho and Sahami, Mehran.}, interhash = {290eabe518274b6fbcc73a106a7d52a6}, intrahash = {45ab79501c114299142864becfa6c841}, isbn = {9781420059403 1420059408}, pages = {--}, publisher = {CRC Press}, refid = {144226505}, title = {Text mining : classification, clustering, and applications}, url = {http://www.worldcat.org/search?qt=worldcat_org_all&q=9781420059403}, year = 2009 } @inproceedings{mitzlaff2011community, author = {Mitzlaff, Folke and Atzmueller, Martin and Benz, Dominik and Hotho, Andreas and Stumme, Gerd}, booktitle = {Analysis of Social Media and Ubiquitous Data}, interhash = {1ef065a81ed836dfd31fcc4cd4da133b}, intrahash = {0f45e870093c053e6f41f54c14bda46b}, series = {LNAI}, title = {{Community Assessment using Evidence Networks}}, volume = 6904, year = 2011 }