@book{noauthororeditoryahoo, abstract = {The past decade has witnessed the emergence of participatory Web and social media, bringing people together in many creative ways. Millions of users are playing, tagging, working, and socializing online, demonstrating new forms of collaboration, communication, and intelligence that were hardly imaginable just a short time ago. Social media also helps reshape business models, sway opinions and emotions, and opens up numerous possibilities to study human interaction and collective behavior in an unparalleled scale. This lecture, from a data mining perspective, introduces characteristics of social media, reviews representative tasks of computing with social media, and illustrates associated challenges. It introduces basic concepts, presents state-of-the-art algorithms with easy-to-understand examples, and recommends effective evaluation methods. In particular, we discuss graph-based community detection techniques and many important extensions that handle dynamic, heterogeneous networks in social media. We also demonstrate how discovered patterns of communities can be used for social media mining. The concepts, algorithms, and methods presented in this lecture can help harness the power of social media and support building socially-intelligent systems. This book is an accessible introduction to the study of \emph{community detection and mining in social media}. It is an essential reading for students, researchers, and practitioners in disciplines and applications where social media is a key source of data that piques our curiosity to understand, manage, innovate, and excel. This book is supported by additional materials, including lecture slides, the complete set of figures, key references, some toy data sets used in the book, and the source code of representative algorithms. The readers are encouraged to visit the book website for the latest information. Table of Contents: Social Media and Social Computing / Nodes, Ties, and Influence / Community Detection and Evaluation / Communities in Heterogeneous Networks / Social Media Mining }, author = {Tang‌, Lei and Liu‌, Huan}, doi = {10.2200/S00298ED1V01Y201009DMK003}, interhash = {717f8b976eec1dc934a3b84675456f25}, intrahash = {c4e1fa6bf2d52a237e5557640d87c970}, title = {Community Detection and Mining in Social Media}, url = {http://www.morganclaypool.com/doi/abs/10.2200/S00298ED1V01Y201009DMK003}, year = 2010 } @book{rijsbergen79information, address = {London}, author = {van Rijsbergen, C. J.}, edition = 2, interhash = {0edccdac9af024f458911b82f61686ab}, intrahash = {b53893655b48140d4310a848dbf204d3}, publisher = {Butterworths}, title = {Information retrieval}, url = {http://www.dcs.gla.ac.uk/Keith/Preface.html}, year = 1979 } @book{books/mk/WittenMB99, author = {Witten, Ian H. and Moffat, Alistair and Bell, Timothy C.}, interhash = {e27de62d31e9665025dc788cb30027d5}, intrahash = {99a44801fa131f7fda77e769791f5f78}, isbn = {1-55860-570-3}, publisher = {Morgan Kaufmann}, title = {Managing Gigabytes: Compressing and Indexing Documents and Images, Second Edition}, url = {http://www.cs.mu.oz.au/mg/}, year = 1999 } @book{books/aw/Baeza-YatesR99, author = {Baeza-Yates, Ricardo A. and Ribeiro-Neto, Berthier A.}, interhash = {6f78177742b3c836218aacfc7fc4c43c}, intrahash = {16ab70975f635f8d72de82e2ef3ef9de}, isbn = {0-201-39829-X}, publisher = {ACM Press / Addison-Wesley}, title = {Modern Information Retrieval}, url = {http://www.ischool.berkeley.edu/~hearst/irbook/glossary.html}, year = 1999 } @book{ferber2003information, address = {Heidelberg}, author = {Ferber, Reginald}, interhash = {52c1b4ab3e818efef6635eb76b778608}, intrahash = {b60dbc902a2e19877aec154fa5747751}, publisher = {dpunkt Verlag}, title = {Information Retrieval: Suchmodelle und Data-Mining-Verfahren für Textsammlungen und das Web}, url = {http://information-retrieval.de/}, year = 2003 } @book{manning2008, author = {Manning, C. D. and Raghavan, P. and Schütze, H.}, interhash = {2e574e46b7668a7268e7f02b46f4d9bb}, intrahash = {2588419fae77ef64bd735f4265f7daa5}, publisher = {Cambridge University Press}, title = {Introduction to Information Retrieval}, url = {http://www-csli.stanford.edu/~hinrich/information-retrieval-book.html}, year = 2008 } @article{charniak97statistical, author = {Charniak, Eugene}, interhash = {9f0c334b655cfa509f9862a0569cd375}, intrahash = {1d02e8f9d663f5cd8203ec6685a958ed}, journal = {AI Magazine}, number = 4, pages = {33-44}, title = {Statistical Techniques for Natural Language Parsing}, url = {http://citeseer.ist.psu.edu/article/charniak97statistical.html}, volume = 18, year = 1997 } @book{carstensen04coling, address = {Heidelberg}, editor = {Carstensen, K.-U. and Eber, Ch. and Endriss, C. and Jekat, S. and Klabunde, R. and Langer, H.}, interhash = {4fdc8473c8de1fd4b8f83a39ef6e3d7a}, intrahash = {13ee9bbbbaf6714679a51bd625aa42ef}, publisher = {Spektrum Akademischer Verlag}, title = {{Computerlinguistik und Sprachtechnologie. Eine Einf{\"u}hrung}}, url = {http://www.worldcat.org/wcpa/oclc/47812608?page=frame&url=http%3A%2F%2Fopac.bibliothek.uni-kassel.de%2FDB%3D23%2FSET%3D1%2FTTL%3D1%2FCMD%3FACT%3DSRCHA%3FIKT%3D7%26SRT%3DYOP%26TRM%3D3827410274&title=Univ+Gesamthochschule+Kassel&linktype=opac&detail=DEUBK%3AUniv+Gesamthochschule+Kassel%3AAcademic}, year = 2004 } @inproceedings{conf/www/SarwarKKR01, author = {Sarwar, Badrul M. and Karypis, George and Konstan, Joseph A. and Riedl, John}, booktitle = {WWW}, ee = {http://doi.acm.org/10.1145/371920.372071}, interhash = {043d1aaba0f0b8c01d84edd517abedaf}, intrahash = {f349b429624935212ebeed613b89794f}, pages = {285-295}, title = {Item-based collaborative filtering recommendation algorithms.}, url = {http://www10.org/cdrom/papers/pdf/p519.pdf}, year = 2001 } @book{manning99foundations, address = {Cambridge, Massachusetts}, author = {Manning, Christopher D. and Sch{\"u}tze, Hinrich}, interhash = {a81df02f92f266a51183fe936f588a08}, intrahash = {2fad675aa6ae88082af2507c16d54343}, publisher = {The {MIT} Press}, title = {Foundations of Statistical Natural Language Processing}, url = {http://nlp.stanford.edu/fsnlp/}, year = 1999 } @inproceedings{conf/sigmod/NgLHP98, author = {Ng, Raymond T. and Lakshmanan, Laks V. S. and Han, Jiawei and Pang, Alex}, booktitle = {SIGMOD Conference}, cdrom = {SIGMOD98/P013.PDF}, cite = {conf/sigmod/AgrawalIS93}, ee = {db/conf/sigmod/NgLHP98.html}, interhash = {c4e73bae8e22a39d15d022631c69ddbf}, intrahash = {72825e6a12b3285349fb64c1020383c0}, pages = {13-24}, title = {Exploratory Mining and Pruning Optimizations of Constrained Association Rules.}, url = {http://dblp.uni-trier.de/db/conf/sigmod/sigmod98.html#NgLHP98}, year = 1998 } @inproceedings{672836, address = {San Francisco, CA, USA}, author = {Agrawal, Rakesh and Srikant, Ramakrishnan}, booktitle = {VLDB '94: Proceedings of the 20th International Conference on Very Large Data Bases}, interhash = {960c924ccbe1ff429a30f7433ec53122}, intrahash = {cce11d670329a38a90f625b8005dfb8d}, isbn = {1-55860-153-8}, pages = {487--499}, publisher = {Morgan Kaufmann Publishers Inc.}, title = {Fast Algorithms for Mining Association Rules in Large Databases}, year = 1994 } @article{park1995ehb, author = {Park, J.S. and Chen, M.S. and Yu, P.S.}, interhash = {e7a28762e92ab579ed3f99c565848f9a}, intrahash = {094af08c931c876e20fd0e1e5086583b}, journal = {Proceedings of the 1995 ACM SIGMOD international conference on Management of data}, pages = {175-186}, publisher = {ACM Press New York, NY, USA}, title = {{An effective hash-based algorithm for mining association rules}}, year = 1995 } @article{MacKay2003, author = {MacKay, David J. C.}, interhash = {86f621d9d6f9f159448f768d792d4511}, intrahash = {9a8c0bf8a22e0e4583032eaab241ea04}, isbn = {9780521642989}, title = {Information Theory, Inference, and Learning Algorithms}, url = {http://www.inference.phy.cam.ac.uk/mackay/itila/}, year = 2003 } @article{dempster77, author = {Dempster, A. and Laird, N. and Rubin, D.}, interhash = {6a3c3e7e36b05f7855a57eab65f93593}, intrahash = {193b7978b7867de7834055bbfb937cba}, journal = {J. Royal Statistical Society, Series B}, number = 1, pages = {1-38}, title = {Maximum likelihood from incomplete data via the EM algorithm.}, volume = 39, year = 1977 } @techreport{berkhin02survey, address = {San Jose, CA}, author = {Berkhin, Pavel}, institution = {Accrue Software}, interhash = {fb9b2c7ce3e0f4e6b579660c40fb67a2}, intrahash = {2fec60df240f69dbf677e34825d20491}, title = {Survey Of Clustering Data Mining Techniques}, url = {http://citeseer.ist.psu.edu/berkhin02survey.html}, year = 2002 } @article{goodword2006lowd, address = {Palo Alto, CA}, author = {Lowd, Daniel and Meek, Christopher}, booktitle = {Second Conference on Email and Anti-Spam (CEAS)}, interhash = {c86d81bb31ea199c1d7aaf8b5e3e280d}, intrahash = {947e546ff2a77a7f099da4955fa73df2}, title = {Good Word Attacks on Statistical Spam Filters}, url = {http://www.cs.washington.edu/homes/lowd/ceas05lowd.pdf}, url1 = {http://www.cs.washington.edu/homes/lowd/ceas05lowd.ppt}, year = 2005 } @techreport{boykin04:_person_email_networ, author = {Boykin, P.O. and Roychowdhury, V.}, institution = {University of California, Los Angeles}, interhash = {3fe3b3135786db00897541e4dfdd3523}, intrahash = {770fc07ad9949bbafa5c3b08f196907b}, month = {February}, title = {{Personal Email Networks: An Effective Anti-Spam Tool}}, url = {http://arxiv.org/abs/cond-mat/0402143}, year = 2004 } @book{books/mit/FayyadPSU96, editor = {Fayyad, Usama M. and Piatetsky-Shapiro, Gregory and Smyth, Padhraic and Uthurusamy, Ramasamy}, interhash = {c11811ccd720de5dad0ffea4741725f0}, intrahash = {3553c3acc971c03813352c40afe7476a}, isbn = {0-262-56097-6}, publisher = {AAAI/MIT Press}, title = {Advances in Knowledge Discovery and Data Mining.}, url = {http://www.amazon.com/gp/product/0262560976}, year = 1996 } @incollection{books/mit/fayyadPSU96/BrachmanA96, author = {Brachman, Ronald J. and Anand, Tej}, booktitle = {Advances in Knowledge Discovery and Data Mining}, interhash = {bcc629281c9cdc0df16afa63be4144d4}, intrahash = {5853cd96c1ed7a92d58df9d3b63d2d97}, isbn = {0-262-02313-X}, pages = {37-57}, title = {The Process of Knowledge Discovery in Databases.}, url = {http://dblp.uni-trier.de/db/books/collections/fayyad96.html#BrachmanA96}, year = 1996 }