P 
Henderson, K. & EliassiRad, T.
(2009):
Applying latent dirichlet allocation to group discovery in large graphs.
In: SAC '09: Proceedings of the 2009 ACM symposium on Applied Computing,
New York, NY, USA.
[Volltext]
[Kurzfassung] [BibTeX][Endnote]
This paper introduces LDAG, a scalable Bayesian approach to finding latent group structures in large realworld graph data. Existing Bayesian approaches for group discovery (such as Infinite Relational Models) have only been applied to small graphs with a couple of hundred nodes. LDAG (short for Latent Dirichlet Allocation for Graphs) utilizes a wellknown topic modeling algorithm to find latent group structure. Specifically, we modify Latent Dirichlet Allocation (LDA) to operate on graph data instead of text corpora. Our modifications reflect the differences between realworld graph data and text corpora (e.g., a node's neighbor count vs. a document's word count). In our empirical study, we apply LDAG to several large graphs (with thousands of nodes) from PubMed (a scientific publication repository). We compare LDAG's quantitative performance on link prediction with two existing approaches: one Bayesian (namely, Infinite Relational Model) and one nonBayesian (namely, Crossassociation). On average, LDAG outperforms IRM by 15% and Crossassociation by 25% (in terms of area under the ROC curve). Furthermore, we demonstrate that LDAG can discover useful qualitative information.
@inproceedings{1529607,
author = {Henderson, Keith and EliassiRad, Tina},
title = {Applying latent dirichlet allocation to group discovery in large graphs},
booktitle = {SAC '09: Proceedings of the 2009 ACM symposium on Applied Computing},
publisher = {ACM},
address = {New York, NY, USA},
year = {2009},
pages = {14561461},
url = {http://portal.acm.org/citation.cfm?id=1529607},
doi = {http://doi.acm.org/10.1145/1529282.1529607},
isbn = {9781605581668},
keywords = {community, detection, lda},
abstract = {This paper introduces LDAG, a scalable Bayesian approach to finding latent group structures in large realworld graph data. Existing Bayesian approaches for group discovery (such as Infinite Relational Models) have only been applied to small graphs with a couple of hundred nodes. LDAG (short for Latent Dirichlet Allocation for Graphs) utilizes a wellknown topic modeling algorithm to find latent group structure. Specifically, we modify Latent Dirichlet Allocation (LDA) to operate on graph data instead of text corpora. Our modifications reflect the differences between realworld graph data and text corpora (e.g., a node's neighbor count vs. a document's word count). In our empirical study, we apply LDAG to several large graphs (with thousands of nodes) from PubMed (a scientific publication repository). We compare LDAG's quantitative performance on link prediction with two existing approaches: one Bayesian (namely, Infinite Relational Model) and one nonBayesian (namely, Crossassociation). On average, LDAG outperforms IRM by 15% and Crossassociation by 25% (in terms of area under the ROC curve). Furthermore, we demonstrate that LDAG can discover useful qualitative information.}
}
%0 = inproceedings
%A = Henderson, Keith and EliassiRad, Tina
%B = SAC '09: Proceedings of the 2009 ACM symposium on Applied Computing
%C = New York, NY, USA
%D = 2009
%I = ACM
%T = Applying latent dirichlet allocation to group discovery in large graphs
%U = http://portal.acm.org/citation.cfm?id=1529607
