@phdthesis{trier05visualization, author = {Trier, Matthias}, interhash = {f36769dd1fffe61d9239e4b4b7dc40e9}, intrahash = {66eb70a04e6946077182446170dd6dcf}, title = {IT-supported Visualization and Evaluation of Virtual Knowledge Communities. Applying Social Network Intelligence Software in Knowledge Management to enable knowledge oriented People Network Management}, url = {http://nbn-resolving.de/urn/resolver.pl?urn=urn:nbn:de:kobv:83-opus-10720}, year = 2005 } @inproceedings{conf/sdm/AggarwalY05, author = {Aggarwal, Charu C. and Yu, Philip S.}, booktitle = {SDM}, interhash = {e1487d660a1614b50bd756f7383b98ea}, intrahash = {bb72c8baa786e98565c4a7448ecae59a}, title = {Online Analysis of Community Evolution in Data Streams.}, url = {http://web.mit.edu/charu/www/aggar142.pdf }, year = 2005 } @misc{almeida03design, author = {Almeida, R.B. and Almeida, V.A.F.}, booktitle = {Proceedings of the 4th International Conference on Internet Computing}, interhash = {c882373d278260ba31ae4142e4f6e664}, intrahash = {41d2e7ad7417153fa5cb257486468919}, pages = {17--23}, title = {Design and evaluation of a user-based community discovery technique}, url = {citeseer.ist.psu.edu/almeida03design.html}, year = 2003 } @inproceedings{988728, abstract = { Current search technologies work in a "one size fits all" fashion. Therefore, the answer to a query is independent of specific user information need. In this paper we describe a novel ranking technique for personalized search servicesthat combines content-based and community-based evidences. The community-based information is used in order to provide context for queries andis influenced by the current interaction of the user with the service. Ouralgorithm is evaluated using data derived from an actual service available on the Web an online bookstore. We show that the quality of content-based ranking strategies can be improved by the use of communityinformation as another evidential source of relevance. In our experiments the improvements reach up to 48% in terms of average precision.}, address = {New York, NY, USA}, author = {Almeida, Rodrigo B. and Almeida, Virgilio A. F.}, booktitle = {Proceedings of the 13th international conference on World Wide Web}, interhash = {6688127f8ee06240c03f506622947f46}, intrahash = {33b448de19ddef891f2a4284b1cc42f1}, isbn = {1-58113-844-X}, pages = {413--421}, publisher = {ACM Press}, title = {A community-aware search engine}, url = {http://doi.acm.org/10.1145/988672.988728}, year = 2004 } @inproceedings{conf/www/BorodinRRT01, address = {New York, NY, USA}, author = {Borodin, Allan and Roberts, Gareth O. and Rosenthal, Jeffrey S. and Tsaparas, Panayiotis}, booktitle = {Proceedings of the 10th international conference on World Wide Web}, interhash = {08872cf4fd099592e76a10afcbb141be}, intrahash = {e8e14fc145cca87570da3f1209711183}, pages = {415--429}, publisher = {ACM Press}, title = {Finding authorities and hubs from link structures on the World Wide Web}, url = {http://doi.acm.org/10.1145/371920.372096}, year = 2001 } @inproceedings{kubica-stochastic, author = {Kubica, Jeremy and Moore, Andrew and Schneider, Jeff and Yang, Yiming}, booktitle = {Proceedings of the Eighteenth National Conference on Artificial Intelligence}, howpublished = {Conference Proceedings}, interhash = {7a471a32a59e73c43dc0dd64d55176d2}, intrahash = {1086034a16434bc39ad42264980df581}, month = {July}, pages = {798--804}, publisher = {AAAI Press/MIT Press}, title = {Stochastic Link and Group Detection}, year = 2002 } @techreport{Kubica_2003_4489, abstract = {Discovering underlying structure from co-occurrence data is an important task in many fields, including: insurance, intelligence, criminal investigation, epidemiology, human resources, and marketing. For example a store may wish to identify underlying sets of items purchased together or a human resources department may wish to identify groups of employees that collaborate with each other. Previously Kubica et. al. presented the group detection algorithm (GDA) - an algorithm for finding underlying groupings of entities from co-occurrence data. This algorithm is based on a probabilistic generative model and produces coherent groups that are consistent with prior knowledge. Unfortunately, the optimization used in GDA is slow, making it potentially infeasible for many real world data sets. To this end, we present k-groups - an algorithm that uses an approach similar to that of k-means (hard clustering and localized updates) to significantly accelerate the discovery of the underlying groups while retaining GDA's probabilistic model. In addition, we show that k-groups is guaranteed to converge to a local minimum. We also compare the performance of GDA and k-groups on several real world and artificial data sets, showing that k-groups' sacrifice in solution quality is significantly offset by its increase in speed. This trade-off makes group detection tractable on significantly larger data sets.}, address = {Pittsburgh, PA}, author = {Kubica, Jeremy Martin and Moore, Andrew and Schneider, Jeff}, institution = {Robotics Institute, Carnegie Mellon University}, interhash = {cecbc69533ab6d63fd478c7a9c7651a1}, intrahash = {3a4df0e814c3a1b125e3d403abe48733}, month = {September}, number = {CMU-RI-TR-03-32}, title = {K-groups: Tractable Group Detection on Large Link Data Sets}, url = {http://www.ri.cmu.edu/pubs/pub_4489.html}, year = 2003 } @inproceedings{kubicaKgroups, author = {Kubica, Jeremy and Moore, Andrew and Schneider, Jeff}, booktitle = {The Third IEEE International Conference on Data Mining}, editor = {Wu, Xindong and Tuzhilin, Alex and Shavlik, Jude}, interhash = {0d7be00e85fa41a082bab454c0665126}, intrahash = {a2602433bd2f144216fdddd3704d487f}, month = {November}, pages = {573-576}, publisher = {IEEE Computer Society}, title = {Tractable Group Detection on Large Link Data Sets}, year = 2003 } @misc{rcclp04defining, abstract = {The investigation of community structures in networks is an important issue in many domains and disciplines. This problem is relevant for social tasks (objective analysis of relationships on the web), biological inquiries (functional studies in metabolic, cellular or protein networks) or technological problems (optimization of large infrastructures). Several types of algorithm exist for revealing the community structure in networks, but a general and quantitative definition of community is still lacking, leading to an intrinsic difficulty in the interpretation of the results of the algorithms without any additional non-topological information. In this paper we face this problem by introducing two quantitative definitions of community and by showing how they are implemented in practice in the existing algorithms. In this way the algorithms for the identification of the community structure become fully self-contained. Furthermore, we propose a new local algorithm to detect communities which outperforms the existing algorithms with respect to the computational cost, keeping the same level of reliability. The new algorithm is tested on artificial and real-world graphs. In particular we show the application of the new algorithm to a network of scientific collaborations, which, for its size, can not be attacked with the usual methods. This new class of local algorithms could open the way to applications to large-scale technological and biological applications.}, author = {Radicchi, Filippo and Castellano, Claudio and Cecconi, Federico and Loreto, Vittorio and Parisi, Domenico}, interhash = {6ec9b00862909de405c08db1c9b43d63}, intrahash = {8634d935e0bf4d74a870d5c805612665}, month = Feb, title = {Defining and identifying communities in networks}, url = {http://arxiv.org/abs/cond-mat/0309488}, year = 2004 } @article{clauset-2004-70, author = {Clauset, Aaron and Newman, M.E.J. and Moore, Cristopher}, interhash = {2c68e3c981a00380692a3b0b661d7cfd}, intrahash = {a35d69f1d41a6cdd0632c5e1cadb4d44}, journal = {Physical Review E}, pages = 066111, title = {Finding community structure in very large networks}, url = {http://www.citebase.org/cgi-bin/citations?id=oai:arXiv.org:cond-mat/0408187}, volume = 70, year = 2004 } @article{newman2004finding, abstract = {We propose and study a set of algorithms for discovering community structure in networks -- natural divisions of network nodes into densely connected subgroups. Our algorithms all share two definitive features: first, they involve iterative removal of edges from the network to split it into communities, the edges removed being identified using one of a number of possible "betweenness" measures, and second, these measures are, crucially, recalculated after each removal. We also propose a measure for the strength of the community structure found by our algorithms, which gives us an objective metric for choosing the number of communities into which a network should be divided. We demonstrate that our algorithms are highly effective at discovering community structure in both computer-generated and real-world network data, and show how they can be used to shed light on the sometimes dauntingly complex structure of networked systems. }, author = {Newman, M.E.J. and Girvan, M.}, doi = {10.1103/PhysRevE.69.026113}, interhash = {b9145040e35ccb4d2a0ce18105e64ff4}, intrahash = {5581d4204604967a209dcc712ac391af}, journal = {Physical Review E}, pages = 026113, title = {Finding and evaluating community structure in networks}, url = {http://arxiv.org/abs/cond-mat/0308217}, volume = 69, year = 2004 } @inbook{tyler2003email, abstract = {We describe a method for the automatic identification of communities of practice from email logs within an organization. We use a betweenness centrality algorithm that can rapidly find communities within a graph representing information flows. We apply this algorithm to an email corpus of nearly one million messages collected over a two-month span, and show that the method is effective at identifying true communities, both formal and informal, within these scale-free graphs. This approach also enables the identification of leadership roles within the communities. These studies are complemented by a qualitative evaluation of the results in the field.}, address = {Deventer, The Netherlands, The Netherlands}, author = {Tyler, Joshua R. and Wilkinson, Dennis M. and Huberman, Bernardo A.}, booktitle = {Communities and technologies}, interhash = {c712e59ff99f12c42a5d3c3b0bf4c48f}, intrahash = {b272b4797aec6d5e3a4972592af93ab2}, pages = {81--96}, publisher = {Kluwer, B.V.}, title = {Email as Spectroscopy: Automated Discovery of Community Structure within Organizations}, url = {http://www.citebase.org/cgi-bin/citations?id=oai:arXiv.org:cond-mat/0303264}, year = 2003 } @article{gn02community, abstract = {A number of recent studies have focused on the statistical properties of networked systems such as social networks and the Worldwide Web. Researchers have concentrated particularly on a few properties that seem to be common to many networks: the small-world property, power-law degree distributions, and network transitivity. In this article, we highlight another property that is found in many networks, the property of community structure, in which network nodes are joined together in tightly knit groups, between which there are only looser connections. We propose a method for detecting such communities, built around the idea of using centrality indices to find community boundaries. We test our method on computer-generated and real-world graphs whose community structure is already known and find that the method detects this known structure with high sensitivity and reliability. We also apply the method to two networks whose community structure is not well known---a collaboration network and a food web---and find that it detects significant and informative community divisions in both cases.}, author = {Girvan, Michelle and Newman, M.E.J.}, interhash = {ecd7a48a37f660ab421472140168c892}, intrahash = {8f80a8586927ea69ea915b6c32e87629}, journal = {Proceedings of the National Academy of Science}, number = 12, pages = {7821-7826}, title = {Community structure in social and biological networks}, volume = 99, year = 2002 } @misc{citeulike:591709, author = {Hastings, M. B.}, interhash = {fe59f9ef2701365c4a7d31aee35a9f6e}, intrahash = {74f8f223ecf17eb48210c77364580ebf}, month = Apr, title = {Community Detection as an Inference Problem}, url = {http://arxiv.org/abs/cond-mat/0604429}, year = 2006 } @inbook{kleinberg2006temporal, author = {Kleinberg, J.}, booktitle = {Data Stream Management: Processing High-Speed Data Streams}, editor = {Garofalakis, M. and Gehrke, J. and Rastogi, R.}, interhash = {85abe180184277c0396745c7ce050c98}, intrahash = {9c57003d80b81eab2f66b2faf02acb27}, isbn = {3540286071}, publisher = {Springer}, title = {Temporal Dynamics of On-Line Information Streams}, url = {http://www.cs.cornell.edu/home/kleinber/stream-survey04.pdf}, year = 2006 } @article{griffiths2004finding, author = {Griffiths, Thomas L. and Steyvers, Mark}, interhash = {387a5060792d52ea73b02dd68e52559e}, intrahash = {cbfda2e50bd63357890b9181d8883826}, title = {Finding scientific topics}, url = {http://www.pnas.org/cgi/content/abstract/101/suppl_1/5228}, year = 2004 } @article{journals/jasis/AmitayCHLS04, author = {Amitay, Einat and Carmel, David and Herscovici, Michael and Lempel, Ronny and Soffer, Aya}, ee = {http://dx.doi.org/10.1002/asi.20082}, interhash = {f824a4e1f323a597abbdcbadb817e7cb}, intrahash = {ed652ec3752ada2526fc675c34eb4d3b}, journal = {JASIST}, number = 14, pages = {1270-1281}, title = {Trend detection through temporal link analysis.}, url = {http://dblp.uni-trier.de/db/journals/jasis/jasis55.html#AmitayCHLS04}, volume = 55, year = 2004 }