@inproceedings{heymann2010tagging,
  abstract = {A fundamental premise of tagging systems is that regular users can organize large collections for browsing and other tasks using uncontrolled vocabularies. Until now, that premise has remained relatively unexamined. Using library data, we test the tagging approach to organizing a collection. We find that tagging systems have three major large scale organizational features: consistency, quality, and completeness. In addition to testing these features, we present results suggesting that users produce tags similar to the topics designed by experts, that paid tagging can effectively supplement tags in a tagging system, and that information integration may be possible across tagging systems.},
  author = {Heymann, Paul and Paepcke, Andreas and Garcia-Molina, Hector},
  booktitle = {WSDM},
  crossref = {conf/wsdm/2010},
  date = {2010-02-18},
  editor = {Davison, Brian D. and Suel, Torsten and Craswell, Nick and Liu, Bing},
  ee = {http://doi.acm.org/10.1145/1718487.1718495},
  file = {:heyman2010tagging.pdf:PDF},
  groups = {public},
  interhash = {d4f72ed57e6b99dbe32e18e218d81ef5},
  intrahash = {12579231cd5449f9a40cba9924975f09},
  isbn = {978-1-60558-889-6},
  pages = {51-60},
  publisher = {ACM},
  timestamp = {2010-04-08 07:27:02},
  title = {Tagging human knowledge.},
  url = {http://dblp.uni-trier.de/db/conf/wsdm/wsdm2010.html#HeymannPG10},
  username = {dbenz},
  year = 2010
}

@inproceedings{ramage2009clustering,
  abstract = {Automatically clustering web pages into semantic groups promises improved search and browsing on the web. In this paper, we demonstrate how user-generated tags from largescale social bookmarking websites such as del.icio.us can be used as a complementary data source to page text and anchor text for improving automatic clustering of web pages. This paper explores the use of tags in 1) K-means clustering in an extended vector space model that includes tags as well as page text and 2) a novel generative clustering algorithm based on latent Dirichlet allocation that jointly models text and tags. We evaluate the models by comparing their output to an established web directory. We find that the naive inclusion of tagging data improves cluster quality versus page text alone, but a more principled inclusion can substantially improve the quality of all models with a statistically significant absolute F-score increase of 4%. The generative model outperforms K-means with another 8% F-score increase.},
  address = {New York, NY, USA},
  author = {Ramage, Daniel and Heymann, Paul and Manning, Christopher D. and Garcia-Molina, Hector},
  booktitle = {WSDM '09: Proceedings of the Second ACM International Conference on Web Search and Data Mining},
  doi = {http://doi.acm.org/10.1145/1498759.1498809},
  file = {ramage2009clustering.pdf:ramage2009clustering.pdf:PDF},
  groups = {public},
  interhash = {5595f06f88310ed67fd6fe23f813c69b},
  intrahash = {75c4bad29d7eb4b34f68da27f0353516},
  isbn = {978-1-60558-390-7},
  location = {Barcelona, Spain},
  pages = {54--63},
  publisher = {ACM},
  timestamp = {2009-04-24 10:19:45},
  title = {Clustering the tagged web},
  url = {http://portal.acm.org/citation.cfm?id=1498809},
  username = {dbenz},
  year = 2009
}