@misc{chakrabarti2000mining, abstract = {Many Web surfers maintain bookmark files containing URLs organized into folders which represent topics by function or content. We propose a new framework for discovering themes among such folders created by a community of surfers with overlapping interests. In principle, surfers need not create personal folders; they can just use a 'standard' tax- onomy such as Yahoo. In practice, a standard taxonomy is neither necessary nor sufficient; it is too detailed in most part, but likely to be inadequate in the community's areas of deep interest. Our framework, based on the minimum de- scription length principle, exploits two semantically different relations: "term occurs in document" and "document occurs in folder", and combines them without the need for heuris- tic weighting of attributes. Themes, in effect, finds common factors in people's interests when it can, while maintaining their individuality when it must. We present experimen- tal results to illustrate our framework and show how it is different from content-based clustering. We also propose a number of applications to finding themes, including collab- orative recommendation and resource discovery.}, author = {Chakrabarti, S. and Batterrywala, Y.}, file = {chakrabarti2000mining.pdf:chakrabarti2000mining.pdf:PDF}, interhash = {dd726da3987cfe89f9654fd2a90da2d8}, intrahash = {7c884093f2232d32cf226d774f93269a}, lastdatemodified = {2005-08-06}, lastname = {Chakrabarti}, own = {own}, pdf = {chakrabarti00.pdf}, read = {read}, title = {Mining themes from bookmarks}, url = {chakrabarti00.ps}, year = 2000 } @article{Chakrabartietal99, author = {Chakrabarti, S. and van den Berg, M. and Dom, B.}, interhash = {e35ac8e9c02ab2a5075b9c1692ac7a2d}, intrahash = {004dd97a2b2e71fa2cfe6820c74c9701}, isbn = {90-74821-43-X}, journal = {Computer Networks}, pages = {1623--1640}, title = {Focused Crawling: A New Approach to Topic-Specific Web Resource Discovery}, url = {citeseer.nj.nec.com/chakrabarti99focused.html}, volume = 31, year = 1999 } @inproceedings{Chakrabartietal98, author = {Chakrabarti, S. and Dom, B. and Gibson, D. and Kleinberg, J. and Raghavan, P. and Rajagopalan, S.}, booktitle = {Proceedings of the 7th World-wide web conference (WWW7),30(1-7)}, interhash = {911034f92829572cb2c211ad26849bda}, intrahash = {9333fd6bd5864b94c9644e979c7a9fec}, location = {Santa Barbara, CA}, pages = {65--74}, title = {Automatic resource compilation by analyzing hyperlink structure and associated text}, url = {citeseer.nj.nec.com/chakrabarti98automatic.html}, year = 1998 } @book{Chakrabarti03, address = {San Francisco, CA}, author = {Chakrabarti, S.}, interhash = {17b9fcd01a172b10b3bd238457902ead}, intrahash = {70fcada9c1a5537b5394a49c326a7110}, isbn = {90-74821-43-X}, publisher = {Morgan Kaufmann}, title = {mining the web}, year = 2003 } @article{Chakrabarti2000, author = {Chakrabarti, S.}, interhash = {e54787477738b367fad5aa57523a2cfd}, intrahash = {31d8cbe4de9c172bf3284a29b0cf57bf}, journal = {SIGKDD Explorations}, location = {Santa Barbara, CA}, number = 2, pages = {1--11}, publisher = {ACM}, title = {Data mining for hypertext: A tutorial survey}, url = {citeseer.nj.nec.com/304115.html}, volume = 1, year = 2000 }