AlSumait, L.; Barbará, D.; Gentle, J. & Domeniconi, C.
(2009):
Topic Significance Ranking of LDA Generative Models.
In: Machine Learning and Knowledge Discovery in Databases,
Erscheinungsjahr/Year: 2009.
Seiten/Pages: 67-82.
[Volltext] [Kurzfassung] [BibTeX]
[Endnote]
Topic models, like Latent Dirichlet Allocation (LDA), have been recently used to automatically generate text corpora topics,
d to subdivide the corpus words among those topics. However, not all the estimated topics are of equal importance or correspondto genuine themes of the domain. Some of the topics can be a collection of irrelevant words, or represent insignificant themes.Current approaches to topic modeling perform manual examination to find meaningful topics. This paper presents the first automatedunsupervised analysis of LDA models to identify junk topics from legitimate ones, and to rank the topic significance. Basically,the distance between a topic distribution and three definitions of “junk distribution” is computed using a variety of measures,from which an expressive figure of the topic significance is implemented using 4-phase Weighted Combination approach. Ourexperiments on synthetic and benchmark datasets show the effectiveness of the proposed approach in ranking the topic significance.
@article{loulwah2009topic,
author = {AlSumait, Loulwah and Barbará, Daniel and Gentle, James and Domeniconi, Carlotta},
title = {Topic Significance Ranking of LDA Generative Models},
journal = {Machine Learning and Knowledge Discovery in Databases},
year = {2009},
pages = {67--82},
url = {http://dx.doi.org/10.1007/978-3-642-04180-8_22},
keywords = {2009, ecml, generative, lda, mining, model, pkdd, text},
abstract = {Topic models, like Latent Dirichlet Allocation (LDA), have been recently used to automatically generate text corpora topics,
and to subdivide the corpus words among those topics. However, not all the estimated topics are of equal importance or correspondto genuine themes of the domain. Some of the topics can be a collection of irrelevant words, or represent insignificant themes.Current approaches to topic modeling perform manual examination to find meaningful topics. This paper presents the first automatedunsupervised analysis of LDA models to identify junk topics from legitimate ones, and to rank the topic significance. Basically,the distance between a topic distribution and three definitions of “junk distribution” is computed using a variety of measures,from which an expressive figure of the topic significance is implemented using 4-phase Weighted Combination approach. Ourexperiments on synthetic and benchmark datasets show the effectiveness of the proposed approach in ranking the topic significance.}
}
%0 = article
%A = AlSumait, Loulwah and Barbará, Daniel and Gentle, James and Domeniconi, Carlotta
%D = 2009
%T = Topic Significance Ranking of LDA Generative Models
%U = http://dx.doi.org/10.1007/978-3-642-04180-8_22
Chebolu, P. & Melsted, P.
(2008):
PageRank and the random surfer model.
Erscheinungsjahr/Year: 2008.
Seiten/Pages: 1010-1018.
[Volltext] [BibTeX]
[Endnote]
@article{chebolu2008pagerank,
author = {Chebolu, P. and Melsted, P.},
title = {PageRank and the random surfer model},
booktitle = {Proceedings of the nineteenth annual ACM-SIAM symposium on Discrete algorithms},
year = {2008},
pages = {1010--1018},
url = {http://scholar.google.de/scholar.bib?q=info:f7YaFVQIaeIJ:scholar.google.com/&output=citation&hl=de&ct=citation&cd=2},
keywords = {model, pagerank, random, surfer}
}
%0 = article
%A = Chebolu, P. and Melsted, P.
%B = Proceedings of the nineteenth annual ACM-SIAM symposium on Discrete algorithms
%D = 2008
%T = PageRank and the random surfer model
%U = http://scholar.google.de/scholar.bib?q=info:f7YaFVQIaeIJ:scholar.google.com/&output=citation&hl=de&ct=citation&cd=2
Pennock, D.; Flake, G.; Lawrence, S.; Glover, E. & Giles, C. L.
(2002):
Winners don't take all: Characterizing the
competition for links on the web.
In: Proc.National Academy of Sciences,
Ausgabe/Number: 8,
Vol. 99,
Erscheinungsjahr/Year: 2002.
Seiten/Pages: 5207-5211.
[BibTeX]
[Endnote]
@article{pennock2002winners,
author = {Pennock, David and Flake, Gary and Lawrence, Steve and Glover, Eric and Giles, C. Lee},
title = {Winners don't take all: Characterizing the
competition for links on the web},
journal = {Proc.National Academy of Sciences},
year = {2002},
volume = {99},
number = {8},
pages = {5207--5211},
keywords = {community, degree, distribution, model, powerlaw, smallworld}
}
%0 = article
%A = Pennock, David and Flake, Gary and Lawrence, Steve and Glover, Eric and Giles, C. Lee
%D = 2002
%T = Winners don't take all: Characterizing the
competition for links on the web
Snijders, T.
(2002):
Markov chain Monte Carlo estimation of exponential random graph models.
In: Journal of Social Structure,
Ausgabe/Number: 2,
Vol. 3,
Erscheinungsjahr/Year: 2002.
Seiten/Pages: 1-40.
[BibTeX]
[Endnote]
@article{snijders2002mcm,
author = {Snijders, T.A.B.},
title = {Markov chain Monte Carlo estimation of exponential random graph models},
journal = {Journal of Social Structure},
year = {2002},
volume = {3},
number = {2},
pages = {1--40},
keywords = {carlo, estimation, exponential, generation, graph, model, monte, p*, parameter, sna}
}
%0 = article
%A = Snijders, T.A.B.
%D = 2002
%T = Markov chain Monte Carlo estimation of exponential random graph models
Newman, M.; Strogatz, S. & Watts, D.
(2001):
Random graphs with arbitrary degree distributions and their applications.
In: Arxiv preprint cond-mat/0007235,
Erscheinungsjahr/Year: 2001.
[BibTeX]
[Endnote]
@article{newman2001rga,
author = {Newman, MEJ and Strogatz, SH and Watts, DJ},
title = {Random graphs with arbitrary degree distributions and their applications},
journal = {Arxiv preprint cond-mat/0007235},
year = {2001},
keywords = {configuration, degree, distribution, function, generating, graph, model, random}
}
%0 = article
%A = Newman, MEJ and Strogatz, SH and Watts, DJ
%D = 2001
%T = Random graphs with arbitrary degree distributions and their applications
Anderson, C.; Wasserman, S. & Crouch, B.
(1999):
A p* primer: Logit models for social networks.
In: Social Networks,
Ausgabe/Number: 1,
Vol. 21,
Verlag/Publisher: Elsevier.
Erscheinungsjahr/Year: 1999.
Seiten/Pages: 37-66.
[BibTeX]
[Endnote]
@article{anderson1999ppl,
author = {Anderson, C.J. and Wasserman, S. and Crouch, B.},
title = {A p* primer: Logit models for social networks},
journal = {Social Networks},
publisher = {Elsevier},
year = {1999},
volume = {21},
number = {1},
pages = {37--66},
keywords = {carlo, exponential, generation, graph, model, monte, random, simulation, sna}
}
%0 = article
%A = Anderson, C.J. and Wasserman, S. and Crouch, B.
%D = 1999
%I = Elsevier
%T = A p* primer: Logit models for social networks
(1998):
Learning in Graphical Models.
Erscheinungsjahr/Year: 1998.
Verlag/Publisher: MIT Press,
[BibTeX]
[Endnote]
@book{jordan-learning-98,,
title = {Learning in Graphical Models},
editor = {Jordan, M.},
publisher = {MIT Press},
year = {1998},
keywords = {bayesian, graphical, learning, ml, model}
}
%0 = book
%D = 1998
%I = MIT Press
%T = Learning in Graphical Models
Molloy, M. & Reed, B.
(1995):
A critical point for random graphs with a given degree sequence.
[Volltext] [BibTeX]
[Endnote]
@misc{molloy_reed95,
author = {Molloy, M. and Reed, B.},
title = {A critical point for random graphs with a given degree sequence},
journal = {Random Structures & Algorithms},
year = {1995},
volume = {6},
pages = {161-179},
url = {/brokenurl#citeseer.ist.psu.edu/molloy95critical.html},
keywords = {component, configuration, giant, graph, model, random, theory}
}
%0 = misc
%A = Molloy, M. and Reed, B.
%B = }
%C =
%D = 1995
%I =
%T = A critical point for random graphs with a given degree sequence}
%U = /brokenurl#citeseer.ist.psu.edu/molloy95critical.html