@misc{chakrabarti2000mining, abstract = {Many Web surfers maintain bookmark files containing URLs organized into folders which represent topics by function or content. We propose a new framework for discovering themes among such folders created by a community of surfers with overlapping interests. In principle, surfers need not create personal folders; they can just use a 'standard' tax- onomy such as Yahoo. In practice, a standard taxonomy is neither necessary nor sufficient; it is too detailed in most part, but likely to be inadequate in the community's areas of deep interest. Our framework, based on the minimum de- scription length principle, exploits two semantically different relations: "term occurs in document" and "document occurs in folder", and combines them without the need for heuris- tic weighting of attributes. Themes, in effect, finds common factors in people's interests when it can, while maintaining their individuality when it must. We present experimen- tal results to illustrate our framework and show how it is different from content-based clustering. We also propose a number of applications to finding themes, including collab- orative recommendation and resource discovery.}, author = {Chakrabarti, S. and Batterrywala, Y.}, file = {chakrabarti2000mining.pdf:chakrabarti2000mining.pdf:PDF}, interhash = {dd726da3987cfe89f9654fd2a90da2d8}, intrahash = {7c884093f2232d32cf226d774f93269a}, lastdatemodified = {2005-08-06}, lastname = {Chakrabarti}, own = {own}, pdf = {chakrabarti00.pdf}, read = {read}, title = {Mining themes from bookmarks}, url = {chakrabarti00.ps}, year = 2000 }