@misc{Vazquez2008,
  abstract = {  Data clustering, including problems such as finding network communities, can
be put into a systematic framework by means of a Bayesian approach. The
application of Bayesian approaches to real problems can be, however, quite
challenging. In most cases the solution is explored via Monte Carlo sampling or
variational methods. Here we work further on the application of variational
methods to clustering problems. We introduce generative models based on a
hidden group structure and prior distributions. We extend previous attends by
Jaynes, and derive the prior distributions based on symmetry arguments. As a
case study we address the problems of two-sides clustering real value data and
clustering data represented by a hypergraph or bipartite graph. From the
variational calculations, and depending on the starting statistical model for
the data, we derive a variational Bayes algorithm, a generalized version of the
expectation maximization algorithm with a built in penalization for model
complexity or bias. We demonstrate the good performance of the variational
Bayes algorithm using test examples.
},
  author = {Vazquez, Alexei},
  interhash = {ee1f9455db7046612d0baf0360e0f428},
  intrahash = {887ae82953a03602e0a135d303950b80},
  note = {cite arxiv:0805.2689
Comment: 12 pages, 5 figures. New sections added},
  title = {Bayesian approach to clustering real value, categorical and network
  data: solution via variational methods},
  url = {http://arxiv.org/abs/0805.2689},
  year = 2008
}

@article{heinrich2005parameter,
  author = {Heinrich, G.},
  interhash = {dbfae9d80facacc023f9a057930699ec},
  intrahash = {53342b5f95d19b270d200603d53ec18a},
  journal = {Web: http://www. arbylon. net/publications/text-est. pdf},
  title = {{Parameter estimation for text analysis}},
  url = {http://scholar.google.de/scholar.bib?q=info:oe4R2fGvQaMJ:scholar.google.com/&output=citation&hl=de&ct=citation&cd=0},
  year = 2005
}

@article{pu2009latent,
  abstract = {Co-clustering has emerged as an important technique for mining contingency data matrices. However, almost all existing co-clustering
algorithms are hard partitioning, assigning each row and column of the data matrix to one cluster. Recently a Bayesian co-clusteringapproach has been proposed which allows a probability distribution membership in row and column clusters. The approach usesvariational inference for parameter estimation. In this work, we modify the Bayesian co-clustering model, and use collapsedGibbs sampling and collapsed variational inference for parameter estimation. Our empirical evaluation on real data sets showsthat both collapsed Gibbs sampling and collapsed variational inference are able to find more accurate likelihood estimatesthan the standard variational Bayesian co-clustering approach.},
  author = {Wang, Pu and Domeniconi, Carlotta and Laskey, Kathryn},
  interhash = {ca3c6ea6255fd4fa4601502fd55bec24},
  intrahash = {0ef1833cdcdf2a7d9093e37894c4f3ab},
  journal = {Machine Learning and Knowledge Discovery in Databases},
  pages = {522--537},
  title = {Latent Dirichlet Bayesian Co-Clustering},
  url = {http://dx.doi.org/10.1007/978-3-642-04174-7_34},
  year = 2009
}

@inproceedings{conf/icdm/ShanB08,
  author = {Shan, Hanhuai and Banerjee, Arindam},
  booktitle = {ICDM},
  crossref = {conf/icdm/2008},
  date = {2009-02-20},
  ee = {http://dx.doi.org/10.1109/ICDM.2008.91},
  interhash = {15f1d5cfb6898d44f170ae51a1f172ef},
  intrahash = {543b31ac1f6f8b70b94976abb95e73c7},
  pages = {530-539},
  publisher = {IEEE Computer Society},
  title = {Bayesian Co-clustering.},
  url = {http://dblp.uni-trier.de/db/conf/icdm/icdm2008.html#ShanB08},
  year = 2008
}

@article{journals/jmlr/BanerjeeMDG05,
  author = {Banerjee, Arindam and Merugu, Srujana and Dhillon, Inderjit S. and Ghosh, Joydeep},
  date = {2007-02-21},
  ee = {http://www.jmlr.org/papers/v6/banerjee05b.html},
  interhash = {50d46127d134382ca84699ce24171c3f},
  intrahash = {bba5d5241acf3ec9eea3f869a832c629},
  journal = {Journal of Machine Learning Research},
  pages = {1705-1749},
  title = {Clustering with Bregman Divergences.},
  url = {http://dblp.uni-trier.de/db/journals/jmlr/jmlr6.html#BanerjeeMDG05},
  volume = 6,
  year = 2005
}

@article{339252,
  address = {Hingham, MA, USA},
  author = {Jordan, Michael I. and Ghahramani, Zoubin and Jaakkola, Tommi S. and Saul, Lawrence K.},
  doi = {http://dx.doi.org/10.1023/A:1007665907178},
  interhash = {f305ddebfd438a2575f09d72467a81c9},
  intrahash = {30a0c13528ae353d38e6c8aed9db7821},
  issn = {0885-6125},
  journal = {Mach. Learn.},
  number = 2,
  pages = {183--233},
  publisher = {Kluwer Academic Publishers},
  title = {An Introduction to Variational Methods for Graphical Models},
  url = {http://portal.acm.org/citation.cfm?id=339248.339252},
  volume = 37,
  year = 1999
}

@book{jordan-learning-98,
  editor = {Jordan, M.},
  interhash = {dca14c475ead34e75711dfe8bb911d96},
  intrahash = {101d8938173add30b69dd1f4872e6eb7},
  publisher = {MIT Press},
  title = {Learning in Graphical Models},
  year = 1998
}

@article{cowell1999introduction,
  author = {Cowell, R.},
  interhash = {dc28d5ed9c9188760a49b0be7f12d8ab},
  intrahash = {9619e1a67b56ce6da1f0d7d499805498},
  journal = {Learning in graphical models},
  pages = {9--26},
  title = {{Introduction to inference for Bayesian networks}},
  url = {http://scholar.google.de/scholar.bib?q=info:BmltKf6AqYkJ:scholar.google.com/&output=citation&hl=de&ct=citation&cd=0},
  year = 1999
}

@book{cowell1998advanced,
  author = {Cowell, R.},
  interhash = {fe438e1412e694bba0969bc7f99310a6},
  intrahash = {aa27d5a4998c8c6967049cb99c5bd40e},
  publisher = {Learning in Graphical Models. MIT Press},
  title = {{Advanced inference in Bayesian networks}},
  url = {http://scholar.google.de/scholar.bib?q=info:PZ3Aqxv-3FgJ:scholar.google.com/&output=citation&hl=de&ct=citation&cd=0},
  year = 1998
}

@article{an1984stochastic,
  author = {AN, S.G.E.M. and AN, D.G.E.M.},
  interhash = {49a6800007b066b9886e043eb7d25642},
  intrahash = {6aed52c1277e7cf789c06994f77fc778},
  journal = {IEEE Trans. Pattern Anal. Machine Intell},
  pages = {721--741},
  title = {{Stochastic relaxation, Gibbs distributions, and the Bayesian restoration of images}},
  url = {http://scholar.google.de/scholar.bib?q=info:E_YCl1NmoesJ:scholar.google.com/&output=citation&hl=de&ct=citation&cd=0},
  volume = 6,
  year = 1984
}

@inproceedings{teh_08,
  author = {Teh, Y. W. and Kurihara, K. and Welling, M.},
  booktitle = {Advances in Neural Information Processing Systems},
  interhash = {66b17a5b20bed0f5aa8c9dc71483919d},
  intrahash = {e8a404aee8a532217fc16d91debb5637},
  owner = {gregor},
  timestamp = {2008.03.29},
  title = {Collapsed Variational Inference for {HDP}},
  volume = 20,
  year = 2008
}

@article{Buntine94operationsfor,
  abstract = {This paper is a multidisciplinary review of empirical, statistical learning from a graphical model perspective. Well-known examples of graphical models include Bayesian networks, directed graphs representing a Markov chain, and undirected networks representing a Markov field. These graphical models are extended to model data analysis and empirical learning using the notation of plates. Graphical operations for simplifying and manipulating a problem are provided including decomposition, differentiation, and the manipulation of probability models from the exponential family. Two standard algorithm schemas for learning are reviewed in a graphical framework: Gibbs sampling and the expectation maximization algorithm. Using these operations and schemas, some popular algorithms can be synthesized from their graphical specification. This includes versions of linear regression, techniques for feed-forward networks, and learning Gaussian and discrete Bayesian networks from data. The paper conclu...},
  author = {Buntine, Wray L.},
  interhash = {c7dd650780467c934551356630a7b739},
  intrahash = {8952cf0d215116e038971f7c30d6d19d},
  journal = {Journal of Artificial Intelligence Research},
  pages = {159--225},
  title = {Operations for Learning with Graphical Models},
  url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.52.696},
  volume = 2,
  year = 1994
}

@misc{murphy01,
  author = {Murphy, Kevin},
  bdsk-url-1 = {http://www.ai.mit.edu/~murphyk/Papers/intro_gm.pdf},
  howpublished = {Web},
  interhash = {d1fcd0ead413e934c318979cf4285b72},
  intrahash = {b3dfa03c046b5ea0a6790f986abc03f9},
  owner = {heinrich},
  timestamp = {2009.04.07},
  title = {An introduction to graphical models},
  url = {http://www.ai.mit.edu/~murphyk/Papers/intro_gm.pdf},
  year = 2001
}

@inproceedings{1103922,
  abstract = {Sophisticated computer graphics applications require complex models of appearance, motion, natural phenomena, and even artistic style. Such models are often difficult or impossible to design by hand. Recent research demonstrates that, instead, we can "learn" a dynamical and/or appearance model from captured data, and then synthesize realistic new data from the model. For example, we can capture the motions of a human actor and then generate new motions as they might be performed by that actor. Bayesian reasoning is a fundamental tool of machine learning and statistics, and it provides powerful tools for solving otherwise-difficult problems of learning about the world from data. Beginning from first principles, this course develops the general methodologies for designing learning algorithms and describes their application to several problems in graphics.},
  address = {New York, NY, USA},
  author = {Hertzmann, Aaron},
  booktitle = {SIGGRAPH '04: ACM SIGGRAPH 2004 Course Notes},
  doi = {http://doi.acm.org/10.1145/1103900.1103922},
  interhash = {261f4139fd56f371c7ff828b5f2d6df8},
  intrahash = {b3e68fb8932bb0cca51bc56a36857bf7},
  location = {Los Angeles, CA},
  pages = 22,
  publisher = {ACM},
  title = {Introduction to Bayesian learning},
  url = {http://portal.acm.org/citation.cfm?id=1103900.1103922},
  year = 2004
}

@book{jordan1998learning,
  author = {Jordan, M.I.},
  interhash = {dca14c475ead34e75711dfe8bb911d96},
  intrahash = {9e4542bbc55ee07b8fa1c45d465b2f95},
  publisher = {Kluwer Academic Publishers},
  title = {{Learning in graphical models}},
  url = {http://scholar.google.de/scholar.bib?q=info:EZqYGcIKUI8J:scholar.google.com/&output=citation&hl=de&ct=citation&cd=0},
  year = 1998
}

@article{teh2006hierarchical,
  author = {Teh, Y.W. and Jordan, M.I. and Beal, M.J. and Blei, D.M.},
  interhash = {34e30f6d1538ed136344f6a9cf8a791b},
  intrahash = {442d8c2028cc73f909b44b6d67c606b2},
  journal = {Journal of the American Statistical Association},
  number = 476,
  pages = {1566--1581},
  publisher = {Citeseer},
  title = {{Hierarchical dirichlet processes}},
  url = {http://scholar.google.de/scholar.bib?q=info:NVEeNb3JVywJ:scholar.google.com/&output=citation&hl=de&ct=citation&cd=0},
  volume = 101,
  year = 2006
}