@article{mitzenmacher2004history, abstract = {Recently, I became interested in a current debate over whether file size distributions are best modelled by a power law distribution or a lognormal distribution. In trying to learn enough about these distributions to settle the question, I found a rich and long history, spanning many fields. Indeed, several recently proposed models from the computer science community have antecedents in work from decades ago. Here, I briefly survey some of this history, focusing on underlying generative models that lead to these distributions. One finding is that lognormal and power law distributions connect quite naturally, and hence, it is not surprising that lognormal distributions have arisen as a possible alternative to power law distributions across many fields. }, author = {Mitzenmacher, M.}, interhash = {50b0caa36c6cbc1ecfa0714157f06bd1}, intrahash = {acdeb6b7980b25477665939c191f1e40}, journal = {Internet Mathematics}, number = 2, pages = {226--251}, title = {A Brief History of Generative Models for Power Law and Lognormal Distributions }, url = {http://www.eecs.harvard.edu/~michaelm/CS223/powerlaw.pdf}, volume = 1, year = 2004 } @article{mitzenmacher2004history, abstract = {Recently, I became interested in a current debate over whether file size distributions are best modelled by a power law distribution or a lognormal distribution. In trying to learn enough about these distributions to settle the question, I found a rich and long history, spanning many fields. Indeed, several recently proposed models from the computer science community have antecedents in work from decades ago. Here, I briefly survey some of this history, focusing on underlying generative models that lead to these distributions. One finding is that lognormal and power law distributions connect quite naturally, and hence, it is not surprising that lognormal distributions have arisen as a possible alternative to power law distributions across many fields. }, author = {Mitzenmacher, M.}, interhash = {50b0caa36c6cbc1ecfa0714157f06bd1}, intrahash = {acdeb6b7980b25477665939c191f1e40}, journal = {Internet Mathematics}, number = 2, pages = {226--251}, title = {A Brief History of Generative Models for Power Law and Lognormal Distributions }, url = {http://www.eecs.harvard.edu/~michaelm/CS223/powerlaw.pdf}, volume = 1, year = 2004 } @inproceedings{dellschaft2008epistemic, abstract = {In recent literature, several models were proposed for reproducing and understanding the tagging behavior of users. They all assume that the tagging behavior is influenced by the previous tag assignments of other users. But they are only partially successful in reproducing characteristic properties found in tag streams. We argue that this inadequacy of existing models results from their inability to include user's background knowledge into their model of tagging behavior. This paper presents a generative tagging model that integrates both components, the background knowledge and the influence of previous tag assignments. Our model successfully reproduces characteristic properties of tag streams. It even explains effects of the user interface on the tag stream.}, acmid = {1379109}, address = {New York, NY, USA}, author = {Dellschaft, Klaas and Staab, Steffen}, booktitle = {Proceedings of the nineteenth ACM conference on Hypertext and hypermedia}, doi = {10.1145/1379092.1379109}, interhash = {cc0d1d4f43effbb6eb7d463422e6c00b}, intrahash = {7877bf1d91bd35067461c306b7f6fd00}, isbn = {978-1-59593-985-2}, location = {Pittsburgh, PA, USA}, numpages = {10}, pages = {71--80}, publisher = {ACM}, series = {HT '08}, title = {An epistemic dynamic model for tagging systems}, url = {http://doi.acm.org/10.1145/1379092.1379109}, year = 2008 } @article{loulwah2009topic, abstract = {Topic models, like Latent Dirichlet Allocation (LDA), have been recently used to automatically generate text corpora topics, and to subdivide the corpus words among those topics. However, not all the estimated topics are of equal importance or correspondto genuine themes of the domain. Some of the topics can be a collection of irrelevant words, or represent insignificant themes.Current approaches to topic modeling perform manual examination to find meaningful topics. This paper presents the first automatedunsupervised analysis of LDA models to identify junk topics from legitimate ones, and to rank the topic significance. Basically,the distance between a topic distribution and three definitions of “junk distribution” is computed using a variety of measures,from which an expressive figure of the topic significance is implemented using 4-phase Weighted Combination approach. Ourexperiments on synthetic and benchmark datasets show the effectiveness of the proposed approach in ranking the topic significance.}, author = {AlSumait, Loulwah and Barbará, Daniel and Gentle, James and Domeniconi, Carlotta}, interhash = {273b61715108282ac89350ba18f99eb2}, intrahash = {6310cb442c4e7852070e4f631fa2c1fa}, journal = {Machine Learning and Knowledge Discovery in Databases}, pages = {67--82}, title = {Topic Significance Ranking of LDA Generative Models}, url = {http://dx.doi.org/10.1007/978-3-642-04180-8_22}, year = 2009 }