@inproceedings{hearst1992automatic, abstract = {We describe a method for the automatic acquisition of the hyponymy lexical relation from unrestricted text. Two goals motivate the approach: (i) avoidance of the need for pre-encoded knowledge and (ii) applicability across a wide range of text. We identify a set of lexico-syntactic patterns that are easily recognizable, that occur frequently and across text genre boundaries, and that indisputably indicate the lexical relation of interest. We describe a method for discovering these patterns and suggest that other lexical relations will also be acquirable in this way. A subset of the acquisition algorithm is implemented and the results are used to augment and critique the structure of a large hand-built thesaurus. Extensions and applications to areas such as information retrieval are suggested.}, acmid = {992154}, address = {Stroudsburg, PA, USA}, author = {Hearst, Marti A.}, booktitle = {Proceedings of the 14th conference on Computational linguistics}, doi = {10.3115/992133.992154}, interhash = {8c1e90c6cc76625c34f20370a1af7ea2}, intrahash = {2c49ad19ac6977bd806b6687e4dcc550}, location = {Nantes, France}, numpages = {7}, pages = {539--545}, publisher = {Association for Computational Linguistics}, title = {Automatic acquisition of hyponyms from large text corpora}, url = {http://dx.doi.org/10.3115/992133.992154}, volume = 2, year = 1992 } @inproceedings{PuWang:2007, abstract = {The exponential growth of text documents available on the Internet has created an urgent need for accurate, fast, and general purpose text classification algorithms. However, the "bag of words" representation used for these classification methods is often unsatisfactory as it ignores relationships between important terms that do not co-occur literally. In order to deal with this problem, we integrate background knowledge - in our application: Wikipedia - into the process of classifying text documents. The experimental evaluation on Reuters newsfeeds and several other corpus shows that our classification results with encyclopedia knowledge are much better than the baseline "bag of words " methods.}, author = {Wang, Pu and Hu, Jian and Zeng, Hua-Jun and Chen, Lijun and Chen, Zheng}, booktitle = {Data Mining, 2007. ICDM 2007. Seventh IEEE International Conference on}, doi = {10.1109/ICDM.2007.77}, interhash = {8a899b60047e20e162fc12b2ff6f8142}, intrahash = {66058efbca5abd1222f72c32365d23fa}, isbn = {978-0-7695-3018-5}, issn = {1550-4786}, pages = {332-341}, title = {Improving Text Classification by Using Encyclopedia Knowledge}, url = {ftp://ftp.computer.org/press/outgoing/proceedings/icdm07/Data/3018a332.pdf}, year = 2007 } @inproceedings{658040, address = {Washington, DC, USA}, author = {Hotho, Andreas and Maedche, Alexander and Staab, Steffen}, booktitle = {ICDM '01: Proceedings of the 2001 IEEE International Conference on Data Mining}, interhash = {e2f356aeefc84fd73c9bcdc08392edf0}, intrahash = {a6803e87c5145d5f55d7bb1bab8dfd67}, isbn = {0-7695-1119-8}, pages = {607--608}, publisher = {IEEE Computer Society}, title = {Text Clustering Based on Good Aggregations}, url = {http://portal.acm.org/citation.cfm?id=658040}, year = 2001 } @inproceedings{hotho_fgml02, author = {Hotho, A. and Stumme, G.}, booktitle = {Proceedings of FGML Workshop}, interhash = {3dd3d4ce38d0de0ba8e167f8133cbb3e}, intrahash = {18fdbebb76d48feccf2dceed23f4cd74}, pages = {37-45}, publisher = {Special Interest Group of German Informatics Society (FGML --- Fachgruppe Maschinelles Lernen der GI e.V.)}, title = {Conceptual Clustering of Text Clusters}, url = {\url{http://www.aifb.uni-karlsruhe.de/WBS/aho/pub/tc_fca_2002_submit.pdf}}, year = 2002 } @inproceedings{cim04a, address = {Lisbon, Portugal}, author = {Cimiano, Philipp and Hotho, Andreas and Staab, Steffen}, booktitle = {Proceedings of the Conference on Languages Resources and Evaluation (LREC)}, interhash = {9374d126c328dab48f52854f73d6db4f}, intrahash = {3bc6e5a51dba862da1b7b3b6ac563370}, month = MAY, publisher = {ELRA - European Language Ressources Association}, title = {Clustering Ontologies from Text}, url = {http://www.kde.cs.uni-kassel.de/hotho/pub/2004/lrec04.pdf}, year = 2004 } @inproceedings{lauser03, author = {Lauser, Boris and Hotho, Andreas}, booktitle = {Proc. of the 7th European Conference in Research and Advanced Technology for Digital Libraries, ECDL 2003}, interhash = {feb38928054a3691f83122b0172c5116}, intrahash = {8b298c325c6ecdb9c01e01057464ae2d}, pages = {140-151}, publisher = {Springer}, series = {LNCS}, title = {Automatic multi-label subject indexing in a multilingual environment}, volume = 2769, year = 2003 } @inproceedings{conf/iis/StaabH03, author = {Staab, Steffen and Hotho, Andreas}, booktitle = {Intelligent Information Processing and Web Mining, Proceedings of the International IIS: IIPWM'03 Conference held in Zakopane}, interhash = {dcb3c9710a44a43f9d8b17c5fc2b0f8c}, intrahash = {d773061117a913428968cc99c6e1ec0f}, isbn = {3-540-00843-8}, pages = {451-452}, title = {Ontology-based Text Document Clustering.}, url = {http://dblp.uni-trier.de/db/conf/iis/iis2003.html#StaabH03}, year = 2003 } @article{cimiano05learning, author = {Cimiano, Philipp and Hotho, Andreas and Staab, Steffen}, date = {2007-07-26}, ee = {http://www.jair.org/papers/paper1648.html}, interhash = {4c09568cff62babd362aab03095f4589}, intrahash = {eaaf0e4b3a8b29fab23b6c15ce2d308d}, journal = {Journal on Artificial Intelligence Research}, pages = {305-339}, title = {Learning Concept Hierarchies from Text Corpora using Formal Concept Analysis}, url = {http://dblp.uni-trier.de/db/journals/jair/jair24.html#CimianoHS05}, volume = 24, year = 2005 } @inproceedings{hotho03ontologies, address = {Melbourne, Florida}, author = {Hotho, Andreas and Staab, Steffen and Stumme, Gerd}, booktitle = {Proceedings of the 2003 IEEE International Conference on Data Mining}, comment = {alpha}, interhash = {b56c36d6d9c9ca9e6bd236a0f92415a5}, intrahash = {57a39c81cff1982dbefed529be934bee}, month = {November 19-22,}, pages = {541-544 (Poster}, publisher = {IEEE {C}omputer {S}ociety}, title = {Ontologies improve text document clustering}, url = {http://www.kde.cs.uni-kassel.de/stumme/papers/2003/hotho2003ontologies.pdf}, year = 2003 } @inproceedings{hotho03explaining, abstract = {Common text clustering techniques offer rather poor capabilities for explaining to their users why a particular result has been achieved. They have the disadvantage that they do not relate semantically nearby terms and that they cannot explain how resulting clusters are related to each other. In this paper, we discuss a way of integrating a large thesaurus and the computation of lattices of resulting clusters into common text clustering in order to overcome these two problems. As its major result, our approach achieves an explanation using an appropriate level of granularity at the concept level as well as an appropriate size and complexity of the explaining lattice of resulting clusters.}, address = {Heidelberg}, author = {Hotho, Andreas and Staab, Steffen and Stumme, Gerd}, booktitle = {Knowledge Discovery in Databases: PKDD 2003, 7th European Conference on Principles and Practice of Knowledge Discovery in Databases}, comment = {alpha}, editor = {Lavra\v{c}, Nada and Gamberger, Dragan and Todorovski, Hendrik BlockeelLjupco}, interhash = {cf66183151a5d94a0941ac6d5089ae89}, intrahash = {53a943b6be4b34cf4e5329d0b58e99f6}, pages = {217-228}, publisher = {Springer}, series = {LNAI}, title = {Explaining Text Clustering Results using Semantic Structures}, url = {http://www.kde.cs.uni-kassel.de/stumme/papers/2003/hotho2003explaining.pdf}, volume = 2838, year = 2003 } @inproceedings{bloehdorn2004icdm, author = {Bloehdorn, Stephan and Hotho, Andreas}, booktitle = {Proceedings of the Fourth IEEE International Conference on Data Mining}, interhash = {f18089d50fdc9c9e38c4fc1e350bdd4e}, intrahash = {7df6357c79445d811f4a9223e688da14}, month = NOV, pages = {331-334}, publisher = {IEEE Computer Society Press}, title = {Text Classification by Boosting Weak Learners based on Terms and Concepts}, url = {http://www.kde.cs.uni-kassel.de/hotho/pub/2004/icdm04boosting.pdf}, year = 2004 } @inproceedings{hotho-ijcaiws2001, author = {Hotho, Andreas and Maedche, Alexander and Staab, Steffen}, booktitle = {Proc. of the Workshop ``Text Learning: Beyond Supervision'' at IJCAI 2001. Seattle, WA, USA, August 6, 2001}, interhash = {cce452c6e28cb3cfb99b3416457f1b24}, intrahash = {15a8cfc9a49b70812a55c76e597db7a5}, publihser = {Springer}, title = {Ontology-based Text Clustering}, year = 2001 } @book{phdthesisBookAho04, address = {Berlin}, author = {Hotho, Andreas}, interhash = {174d464d8c6c38b690ab8aa76cd3fe5f}, intrahash = {fc7f40f7b7c8e3f72acb881b6d2d2680}, isbn = {3-89838-286-9}, publisher = {Akademische Verlagsgesellschaft Aka GmbH}, series = {Diski}, title = {Clustern mit Hintergrundwissen}, url = {http://www.kde.cs.uni-kassel.de/hotho/pub/2004/dissAho.pdf}, volume = 286, year = 2004 } @inproceedings{bloehdorn04msw, author = {Bloehdorn, Stephan and Hotho, Andreas}, booktitle = {Proceedings of the MSW 2004 workshop at the 10th ACM SIGKDD Conference on Knowledge Discovery and Data Mining}, interhash = {9b4b685dda669fc66659f810ceb97890}, intrahash = {584e82bad6eb767636ce3ddd8f1ae233}, month = AUG, pages = {70-87}, title = {Boosting for Text Classification with Semantic Features}, url = {http://www.kde.cs.uni-kassel.de/hotho/pub/2004/msw04bloehdorn.pdf}, year = 2004 } @article{hotho02ki, author = {Hotho, Andreas and Maedche, Alexander and Staab, Steffen}, interhash = {2b242ffbfb47a6b3a01584703ee31b9e}, intrahash = {b54e211028a75303d03dfdcce479428e}, journal = {K{\"{u}}nstliche Intelligenz (KI)}, number = 4, pages = {48-54}, title = {Text Clustering Based on Good Aggregations}, url = {http://www.aifb.uni-karlsruhe.de/WBS/aho/pub/Ontology_based_Text_Document_Clustering_2002.pdf}, volume = 16, year = 2002 } @article{bloehdorn-etal-ldv-2005, author = {Bloehdorn, Stephan and Cimiano, Philipp and Hotho, Andreas and Staab, Steffen}, interhash = {174605ce734930cbdffb624175e845f9}, intrahash = {0abe60273dec207b626c41f431b16b49}, issn = {0175-1336}, journal = {LDV Forum - GLDV Journal for Computational Linguistics and Language Technology}, month = MAY, number = 1, pages = {87-112}, title = {An Ontology-based Framework for Text Mining}, vgwort = {26}, volume = 20, year = 2005 } @article{hotho-etal-ldv-2005, author = {Hotho, Andreas and Nürnberger, Andreas and Paaß, Gerhard}, interhash = {a324706344ddfce8a288870adeef18cb}, intrahash = {6ecc8a3cee1a99bbb9f8f8dd6a9d2959}, issn = {0175-1336}, journal = {LDV Forum - GLDV Journal for Computational Linguistics and Language Technology}, month = MAY, number = 1, pages = {19-62}, title = { A Brief Survey of Text Mining}, url = {http://www.kde.cs.uni-kassel.de/hotho/pub/2005/hotho05TextMining.pdf}, vgwort = {44}, volume = 20, year = 2005 } @inproceedings{hotho_sigir03, address = {Toronto, Canada}, author = {Hotho, A. and Staab, S. and Stumme, G.}, booktitle = {Proc. of the SIGIR 2003 Semantic Web Workshop}, interhash = {c2a9a89ce20cef90a1e78d34dc2c2afe}, intrahash = {b03e58ecb17c09f8c09d1fd93fb24f90}, title = {WordNet improves text document clustering}, url = {http://www.kde.cs.uni-kassel.de/hotho/pub/hothoetal_sigir_ws_sem_web.pdf}, year = 2003 } @inproceedings{hotho_icdm03, author = {Hotho, A. and Staab, S. and Stumme, G.}, booktitle = {Proc. of the ICDM 03, The 2003 IEEE International Conference on Data Mining}, interhash = {b56c36d6d9c9ca9e6bd236a0f92415a5}, intrahash = {8ce56ab228d021b2d7a37bc302bb9a0a}, pages = {541-544}, title = {Ontologies Improve Text Document Clustering}, url = {http://www.kde.cs.uni-kassel.de/hotho/pub/hothoa_icdm_poster03.pdf}, year = 2003 } @phdthesis{phdthesisNo32, address = {Universit{\"{a}}t Karlsruhe (TH), Institut AIFB, D-76128 Karlsruhe}, author = {Hotho, Andreas}, interhash = {174d464d8c6c38b690ab8aa76cd3fe5f}, intrahash = {5bb717f4da468b42a959b0f34e340c83}, note = {Studer/Gaul}, school = {University of Karlsruhe}, title = {Clustern mit Hintergrundwissen}, year = 2004 }