@incollection{noKey, abstract = {Formal Concept Analysis (FCA) is an unsupervised clustering technique and many scientific papers are devoted to applying FCA in Information Retrieval (IR) research. We collected 103 papers published between 2003-2009 which mention FCA and information retrieval in the abstract, title or keywords. Using a prototype of our FCA-based toolset CORDIET, we converted the pdf-files containing the papers to plain text, indexed them with Lucene using a thesaurus containing terms related to FCA research and then created the concept lattice shown in this paper. We visualized, analyzed and explored the literature with concept lattices and discovered multiple interesting research streams in IR of which we give an extensive overview. The core contributions of this paper are the innovative application of FCA to the text mining of scientific papers and the survey of the FCA-based IR research.}, author = {Poelmans, Jonas and Ignatov, DmitryI. and Viaene, Stijn and Dedene, Guido and Kuznetsov, SergeiO.}, booktitle = {Advances in Data Mining. Applications and Theoretical Aspects}, doi = {10.1007/978-3-642-31488-9_22}, editor = {Perner, Petra}, interhash = {04d543b37049a90f2cbd796585bda214}, intrahash = {f6eddba1f2c6b7cdbfa67a0c79ae5ae8}, isbn = {978-3-642-31487-2}, language = {English}, pages = {273-287}, publisher = {Springer Berlin Heidelberg}, series = {Lecture Notes in Computer Science}, title = {Text Mining Scientific Papers: A Survey on FCA-Based Information Retrieval Research}, url = {http://dx.doi.org/10.1007/978-3-642-31488-9_22}, volume = 7377, year = 2012 } @inproceedings{hotho03ontologies, address = {Melbourne, Florida}, author = {Hotho, Andreas and Staab, Steffen and Stumme, Gerd}, booktitle = {Proceedings of the 2003 IEEE International Conference on Data Mining}, comment = {alpha}, interhash = {b56c36d6d9c9ca9e6bd236a0f92415a5}, intrahash = {57a39c81cff1982dbefed529be934bee}, month = {November 19-22,}, pages = {541-544 (Poster}, publisher = {IEEE {C}omputer {S}ociety}, title = {Ontologies improve text document clustering}, url = {http://www.kde.cs.uni-kassel.de/stumme/papers/2003/hotho2003ontologies.pdf}, year = 2003 } @techreport{hotho03textclustering, abstract = {Text document clustering plays an important role in providing intuitive navigation and browsing mechanisms by organizing large amounts of information into a small number of meaningful clusters. Standard partitional or agglomerative clustering methods efficiently compute results to this end. However, the bag of words representation used for these clustering methods is often unsatisfactory as it ignores relationships between important terms that do not co-occur literally. Also, it is mostly left to the user to find out why a particular partitioning has been achieved, because it is only specified extensionally. In order to deal with the two problems, we integrate background knowledge into the process of clustering text documents. First, we preprocess the texts, enriching their representations by background knowledge provided in a core ontology — in our application Wordnet. Then, we cluster the documents by a partitional algorithm. Our experimental evaluation on Reuters newsfeeds compares clustering results with pre-categorizations of news. In the experiments, improvements of results by background knowledge compared to the baseline can be shown for many interesting tasks. Second, the clustering partitions the large number of documents to a relatively small number of clusters, which may then be analyzed by conceptual clustering. In our approach, we applied Formal Concept Analysis. Conceptual clustering techniques are known to be too slow for directly clustering several hundreds of documents, but they give an intensional account of cluster results. They allow for a concise description of commonalities and distinctions of different clusters. With background knowledge they even find abstractions like “food” (vs. specializations like “beef” or “corn”). Thus, in our approach, partitional clustering reduces first the size of the problem such that it becomes tractable for conceptual clustering, which then facilitates the understanding of the results.}, author = {Hotho, Andreas and Staab, Steffen and Stumme, Gerd}, comment = {alpha}, institution = {University of Karlsruhe, Institute AIFB}, interhash = {0bc7c3fc1273355f45c8970a7ea58f97}, intrahash = {61d58db419af0dbc3681432588219c3d}, title = {Text Clustering Based on Background Knowledge}, type = {Technical Report }, url = {http://www.kde.cs.uni-kassel.de/stumme/papers/2003/hotho2003text.pdf}, volume = 425, year = 2003 } @inproceedings{hotho02conceptualclustering, author = {Hotho, A. and Stumme, G.}, booktitle = {Proc. Fachgruppentreffen Maschinelles Lernen (FGML 2002)}, comment = {alpha}, editor = {K\'okai, G. and Zeidler, J.}, interhash = {3dd3d4ce38d0de0ba8e167f8133cbb3e}, intrahash = {e253c44552a046fe90236274bcfeab13}, pages = {37-45}, title = {Conceptual Clustering of Text Clusters}, url = {http://www.kde.cs.uni-kassel.de/stumme/papers/2002/FGML02.pdf}, year = 2002 } @inproceedings{hotho03wordnet, address = {Toronto}, author = {Hotho, A and Staab, S. and Stumme, G.}, booktitle = {Proc. SIGIR Semantic Web Workshop}, comment = {alpha}, interhash = {c2a9a89ce20cef90a1e78d34dc2c2afe}, intrahash = {04c7d86337d68e4ed9ae637029c43414}, title = {Wordnet improves text document clustering}, url = {http://www.kde.cs.uni-kassel.de/stumme/papers/2003/hotho2003wordnet.pdf}, year = 2003 } @inproceedings{hotho03explaining, abstract = {Common text clustering techniques offer rather poor capabilities for explaining to their users why a particular result has been achieved. They have the disadvantage that they do not relate semantically nearby terms and that they cannot explain how resulting clusters are related to each other. In this paper, we discuss a way of integrating a large thesaurus and the computation of lattices of resulting clusters into common text clustering in order to overcome these two problems. As its major result, our approach achieves an explanation using an appropriate level of granularity at the concept level as well as an appropriate size and complexity of the explaining lattice of resulting clusters.}, address = {Heidelberg}, author = {Hotho, Andreas and Staab, Steffen and Stumme, Gerd}, booktitle = {Knowledge Discovery in Databases: PKDD 2003, 7th European Conference on Principles and Practice of Knowledge Discovery in Databases}, comment = {alpha}, editor = {Lavra\v{c}, Nada and Gamberger, Dragan and Todorovski, Hendrik BlockeelLjupco}, interhash = {cf66183151a5d94a0941ac6d5089ae89}, intrahash = {53a943b6be4b34cf4e5329d0b58e99f6}, pages = {217-228}, publisher = {Springer}, series = {LNAI}, title = {Explaining Text Clustering Results using Semantic Structures}, url = {http://www.kde.cs.uni-kassel.de/stumme/papers/2003/hotho2003explaining.pdf}, volume = 2838, year = 2003 } @mastersthesis{illig2008machine, address = {Kassel}, author = {Illig, Jens}, interhash = {65c16443f45ffd46175f68d14b4f809a}, intrahash = {9a65067da65e8301182b33b4ae292141}, school = {University of Kassel}, title = {Machine Learnability Analysis of Textclassifications in a Social Bookmarking Folksonomy}, type = {Bachelor Thesis}, year = 2008 } @article{mohammadSubmittedDistributional, author = {Mohammad, Saif and Hirst, Graeme}, interhash = {69e8fa5785216419a3f39536115b814e}, intrahash = {fe1ed4dfc0e42165de44853564c7f6af}, title = {Distributional measures as proxies for semantic relatedness}, url = {http://ftp.cs.toronto.edu/pub/gh/Mohammad+Hirst-2005.pdf}, year = {Submitted for publication} } @article{cimiano05learning, author = {Cimiano, Philipp and Hotho, Andreas and Staab, Steffen}, date = {2007-07-26}, ee = {http://www.jair.org/papers/paper1648.html}, interhash = {4c09568cff62babd362aab03095f4589}, intrahash = {eaaf0e4b3a8b29fab23b6c15ce2d308d}, journal = {Journal on Artificial Intelligence Research}, pages = {305-339}, title = {Learning Concept Hierarchies from Text Corpora using Formal Concept Analysis}, url = {http://dblp.uni-trier.de/db/journals/jair/jair24.html#CimianoHS05}, volume = 24, year = 2005 }