@inproceedings{hearst1992automatic, abstract = {We describe a method for the automatic acquisition of the hyponymy lexical relation from unrestricted text. Two goals motivate the approach: (i) avoidance of the need for pre-encoded knowledge and (ii) applicability across a wide range of text. We identify a set of lexico-syntactic patterns that are easily recognizable, that occur frequently and across text genre boundaries, and that indisputably indicate the lexical relation of interest. We describe a method for discovering these patterns and suggest that other lexical relations will also be acquirable in this way. A subset of the acquisition algorithm is implemented and the results are used to augment and critique the structure of a large hand-built thesaurus. Extensions and applications to areas such as information retrieval are suggested.}, acmid = {992154}, address = {Stroudsburg, PA, USA}, author = {Hearst, Marti A.}, booktitle = {Proceedings of the 14th conference on Computational linguistics}, doi = {10.3115/992133.992154}, interhash = {8c1e90c6cc76625c34f20370a1af7ea2}, intrahash = {2c49ad19ac6977bd806b6687e4dcc550}, location = {Nantes, France}, numpages = {7}, pages = {539--545}, publisher = {Association for Computational Linguistics}, title = {Automatic acquisition of hyponyms from large text corpora}, url = {http://dx.doi.org/10.3115/992133.992154}, volume = 2, year = 1992 } @inproceedings{martins2008extracting, abstract = {Geo-temporal criteria are important for filtering, grouping and prioritizing information resources. This presents techniques for extracting semantic geo-temporal information from text, using simple text mining methods that leverage on a gazetteer. A prototype system, implementing the proposed methods and capable of displaying information over maps and timelines, is described. This prototype can take input in RSS, demonstrating the application to content from many different online sources. Experimental results demonstrate the efficiency and accuracy of the proposed approaches.}, author = {Martins, B. and Manguinhas, H. and Borbinha, J.}, booktitle = {Proceedings of the International Conference on Semantic Computing}, doi = {10.1109/ICSC.2008.86}, interhash = {d03fecb6b3261ffa0a5e11789b188883}, intrahash = {5a889bc7d9e81cb1d294cb83b767bf64}, month = aug, pages = {1--9}, publisher = {IEEE Computer Society}, title = {Extracting and Exploring the Geo-Temporal Semantics of Textual Resources}, url = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=4597167}, year = 2008 } @inproceedings{breck2007identifying, abstract = {While traditional information extraction systems have been built to answer questions about facts, subjective information extraction systems will answer questions about feelings and opinions. A crucial step towards this goal is identifying the words and phrases that express opinions in text. Indeed, although much previous work has relied on the identification of opinion expressions for a variety of sentiment-based NLP tasks, none has focused directly on this important supporting task. Moreover, none of the proposed methods for identification of opinion expressions has been evaluated at the task that they were designed to perform. We present an approach for identifying opinion expressions that uses conditional random fields and we evaluate the approach at the expression-level using a standard sentiment corpus. Our approach achieves expression-level performance within 5% of the human interannotator agreement.}, address = {San Francisco, CA, USA}, author = {Breck, Eric and Choi, Yejin and Cardie, Claire}, booktitle = {IJCAI'07: Proceedings of the 20th International Joint Conference on Artifical Intelligence}, interhash = {cca75dca85a5c73eb48503416d32d7e0}, intrahash = {0526efb8c2a3ca5f5677010032a0cdfe}, location = {Hyderabad, India}, pages = {2683--2688}, publisher = {Morgan Kaufmann Publishers Inc.}, title = {Identifying expressions of opinion in context}, url = {http://portal.acm.org/citation.cfm?id=1625707}, year = 2007 } @inproceedings{lewis1991evaluating, abstract = {While certain standard procedures are widely used for evaluating text retrieval systems and algorithms, the same is not true for text categorization. Omission of important data from reports is common and methods of measuring effectiveness vary widely. This has made judging the relative merits of techniques for text categorization difficult and has disguised important research issues. In this paper I discuss a variety of ways of evaluating the effectiveness of text categorization systems, drawing both on reported categorization experiments and on methods used in evaluating query-driven retrieval. I also consider the extent to which the same evaluation methods may be used with systems for text extraction, a more complex task. In evaluating either kind of system, the purpose for which the output is to be used is crucial in choosing appropriate evaluation methods.}, address = {San Mateo}, author = {Lewis, David D.}, booktitle = {Proceedings of Speech and Natural Language Workshop}, interhash = {a9c64235f49e18a6b80c306b61ff40c2}, intrahash = {2e8f19bde0a73d96d16c071b2016073f}, month = Feb, pages = {312-318}, publisher = {Morgan Kaufmann}, title = {Evaluating text categorization}, url = {http://citeseerx.ist.psu.edu/viewdoc/download;jsessionid=FB1ECC14412DFFF631E7C0725D4DB3CC?doi=10.1.1.56.9675&rep=rep1&type=pdf}, year = 1991 } @inproceedings{hotho03wordnet, address = {Toronto}, author = {Hotho, A and Staab, S. and Stumme, G.}, booktitle = {Proc. SIGIR Semantic Web Workshop}, comment = {alpha}, interhash = {c2a9a89ce20cef90a1e78d34dc2c2afe}, intrahash = {04c7d86337d68e4ed9ae637029c43414}, title = {Wordnet improves text document clustering}, url = {http://www.kde.cs.uni-kassel.de/stumme/papers/2003/hotho2003wordnet.pdf}, year = 2003 } @techreport{hotho03textclustering, abstract = {Text document clustering plays an important role in providing intuitive navigation and browsing mechanisms by organizing large amounts of information into a small number of meaningful clusters. Standard partitional or agglomerative clustering methods efficiently compute results to this end. However, the bag of words representation used for these clustering methods is often unsatisfactory as it ignores relationships between important terms that do not co-occur literally. Also, it is mostly left to the user to find out why a particular partitioning has been achieved, because it is only specified extensionally. In order to deal with the two problems, we integrate background knowledge into the process of clustering text documents. First, we preprocess the texts, enriching their representations by background knowledge provided in a core ontology — in our application Wordnet. Then, we cluster the documents by a partitional algorithm. Our experimental evaluation on Reuters newsfeeds compares clustering results with pre-categorizations of news. In the experiments, improvements of results by background knowledge compared to the baseline can be shown for many interesting tasks. Second, the clustering partitions the large number of documents to a relatively small number of clusters, which may then be analyzed by conceptual clustering. In our approach, we applied Formal Concept Analysis. Conceptual clustering techniques are known to be too slow for directly clustering several hundreds of documents, but they give an intensional account of cluster results. They allow for a concise description of commonalities and distinctions of different clusters. With background knowledge they even find abstractions like “food” (vs. specializations like “beef” or “corn”). Thus, in our approach, partitional clustering reduces first the size of the problem such that it becomes tractable for conceptual clustering, which then facilitates the understanding of the results.}, author = {Hotho, Andreas and Staab, Steffen and Stumme, Gerd}, comment = {alpha}, institution = {University of Karlsruhe, Institute AIFB}, interhash = {0bc7c3fc1273355f45c8970a7ea58f97}, intrahash = {61d58db419af0dbc3681432588219c3d}, title = {Text Clustering Based on Background Knowledge}, type = {Technical Report }, url = {http://www.kde.cs.uni-kassel.de/stumme/papers/2003/hotho2003text.pdf}, volume = 425, year = 2003 } @inproceedings{hotho03ontologies, address = {Melbourne, Florida}, author = {Hotho, Andreas and Staab, Steffen and Stumme, Gerd}, booktitle = {Proceedings of the 2003 IEEE International Conference on Data Mining}, comment = {alpha}, interhash = {b56c36d6d9c9ca9e6bd236a0f92415a5}, intrahash = {57a39c81cff1982dbefed529be934bee}, month = {November 19-22,}, pages = {541-544 (Poster}, publisher = {IEEE {C}omputer {S}ociety}, title = {Ontologies improve text document clustering}, url = {http://www.kde.cs.uni-kassel.de/stumme/papers/2003/hotho2003ontologies.pdf}, year = 2003 } @inproceedings{hotho03explaining, abstract = {Common text clustering techniques offer rather poor capabilities for explaining to their users why a particular result has been achieved. They have the disadvantage that they do not relate semantically nearby terms and that they cannot explain how resulting clusters are related to each other. In this paper, we discuss a way of integrating a large thesaurus and the computation of lattices of resulting clusters into common text clustering in order to overcome these two problems. As its major result, our approach achieves an explanation using an appropriate level of granularity at the concept level as well as an appropriate size and complexity of the explaining lattice of resulting clusters.}, address = {Heidelberg}, author = {Hotho, Andreas and Staab, Steffen and Stumme, Gerd}, booktitle = {Knowledge Discovery in Databases: PKDD 2003, 7th European Conference on Principles and Practice of Knowledge Discovery in Databases}, comment = {alpha}, editor = {Lavra\v{c}, Nada and Gamberger, Dragan and Todorovski, Hendrik BlockeelLjupco}, interhash = {cf66183151a5d94a0941ac6d5089ae89}, intrahash = {53a943b6be4b34cf4e5329d0b58e99f6}, pages = {217-228}, publisher = {Springer}, series = {LNAI}, title = {Explaining Text Clustering Results using Semantic Structures}, url = {http://www.kde.cs.uni-kassel.de/stumme/papers/2003/hotho2003explaining.pdf}, volume = 2838, year = 2003 } @inproceedings{hotho02conceptualclustering, author = {Hotho, A. and Stumme, G.}, booktitle = {Proc. Fachgruppentreffen Maschinelles Lernen (FGML 2002)}, comment = {alpha}, editor = {K\'okai, G. and Zeidler, J.}, interhash = {3dd3d4ce38d0de0ba8e167f8133cbb3e}, intrahash = {e253c44552a046fe90236274bcfeab13}, pages = {37-45}, title = {Conceptual Clustering of Text Clusters}, url = {http://www.kde.cs.uni-kassel.de/stumme/papers/2002/FGML02.pdf}, year = 2002 } @book{phdthesisBookAho04, address = {Berlin}, author = {Hotho, Andreas}, interhash = {174d464d8c6c38b690ab8aa76cd3fe5f}, intrahash = {fc7f40f7b7c8e3f72acb881b6d2d2680}, isbn = {3-89838-286-9}, publisher = {Akademische Verlagsgesellschaft Aka GmbH}, series = {Diski}, title = {Clustern mit Hintergrundwissen}, url = {http://www.kde.cs.uni-kassel.de/hotho/pub/2004/dissAho.pdf}, volume = 286, year = 2004 }