@proceedings{cellier2014proceedings, bibsource = {dblp computer science bibliography, http://dblp.org}, editor = {Cellier, Peggy and Charnois, Thierry and Hotho, Andreas and Matwin, Stan and Moens, Marie{-}Francine and Toussaint, Yannick}, interhash = {212d282598a034c37510c1c08c4f3a34}, intrahash = {cfb7265080d484cfda32e1fbdaff361f}, publisher = {CEUR-WS.org}, series = {{CEUR} Workshop Proceedings}, title = {Proceedings of the 1st International Workshop on Interactions between Data Mining and Natural Language Processing co-located with The European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases, DMNLP@PKDD/ECML 2014, Nancy, France, September 15, 2014}, url = {http://ceur-ws.org/Vol-1202}, volume = 1202, year = 2014 } @inproceedings{zesch2007analysis, abstract = {In this paper, we discuss two graphs in Wikipedia (i) the article graph, and (ii) the category graph. We perform a graph-theoretic analysis of the category graph, and show that it is a scale-free, small world graph like other well-known lexical semantic networks. We substantiate our findings by transferring semantic relatedness algorithms defined on WordNet to the Wikipedia category graph. To assess the usefulness of the category graph as an NLP resource, we analyze its coverage and the performance of the transferred semantic relatedness algorithms. }, address = {Rochester}, author = {Zesch, Torsten and Gurevych, Iryna}, booktitle = {Proceedings of the TextGraphs-2 Workshop (NAACL-HLT)}, interhash = {0401e62edb9bfa85dd498cb40301c0cb}, intrahash = {332ed720a72bf069275f93485432314b}, month = apr, pages = {1--8}, publisher = {Association for Computational Linguistics}, title = {Analysis of the Wikipedia Category Graph for NLP Applications}, url = {http://acl.ldc.upenn.edu/W/W07/W07-02.pdf#page=11}, year = 2007 } @incollection{ABP:11, author = {Atzmueller, Martin and Beer, Stephanie and Puppe, Frank}, booktitle = {Collaboration and the Semantic Web: Social Networks, Knowledge Networks, and Knowledge Resources}, editor = {Brüggemann, Stefan and d’Amato, Claudia}, interhash = {9c0d3f10e985d8654d8a2eae39121ef2}, intrahash = {781410de8780f9033aae08162cbdf073}, pages = {149-167}, publisher = {IGI Global}, title = {{Data Mining, Validation and Collaborative Knowledge Capture}}, year = 2012 } @incollection{Atzmueller:11, author = {Atzmueller, Martin}, booktitle = {Applied Natural Language Processing and Content Analysis: Advances in Identification, Investigation and Resolution.}, editor = {McCarthy, Philip M. and Boonthum, Chutima}, interhash = {b7d700872f84e545b714b9cc59b0c188}, intrahash = {c930dbfed60e5a6d20e8785181f42feb}, publisher = {IGI Global}, title = {Data Mining}, year = 2011 } @incollection{ABP:11, author = {Atzmueller, Martin and Beer, Stephanie and Puppe, Frank}, booktitle = {Collaboration and the Semantic Web: Social Networks, Knowledge Networks and Knowledge Resources.}, editor = {Brüggemann, Stefan and d’Amato, Claudia}, interhash = {83cf9bd4264c938fa454c381a69e9880}, intrahash = {63df2eaae8a6df990b0a56379531a242}, publisher = {IGI Global}, title = {{Data Mining, Validation and Collaborative Knowledge Capture}}, year = 2011 } @book{manning1999foundations, address = {Cambridge, MA}, author = {Manning, C. and Sch\"utze, H.}, interhash = {a81df02f92f266a51183fe936f588a08}, intrahash = {e2f05fae5d02f579a85a10b79edf1d99}, publisher = {MIT Press}, title = {Foundations of statistical natural language processing}, year = 1999 } @article{journals/corr/abs-1103-0398, author = {Collobert, Ronan and Weston, Jason and Bottou, Léon and Karlen, Michael and Kavukcuoglu, Koray and Kuksa, Pavel P.}, ee = {http://arxiv.org/abs/1103.0398}, interhash = {c1e968fc1903e842ab3c638cd5ffca61}, intrahash = {24c6f6531a70625136167307bc15a480}, journal = {CoRR}, note = {informal publication}, title = {Natural Language Processing (almost) from Scratch}, url = {http://static.googleusercontent.com/external_content/untrusted_dlcp/research.google.com/de//pubs/archive/35671.pdf}, volume = {abs/1103.0398}, year = 2011 } @article{spence1990lexical, abstract = {The 1-million-word Brown corpus was searched for co-occurrences of semantically related pairs of concrete nouns appearing within an arbitrary window of 250 characters. Related pairs of nouns (OCEAN-WATER) co-occur significantly more often than matched, unrelated pairs (OCEAN-HAND), and this difference remained significant within blocks of text up to 1000 characters in length. Frequency of co-occurrence, corrected for chance, is significantly correlated with association strength. Lexical distance between co-occurring members of a given pair is inversely correlated with association strength. Significantly more co-occurrences were found, per unit text, in the fictional sections of the corpus.}, affiliation = {Department of Psychiatry Robert Wood Johnson Medical School 08854 Piscataway New Jersey}, author = {Spence, Donald P. and Owens, Kimberly C.}, doi = {10.1007/BF01074363}, interhash = {75d5913fcca51ebf5fd7e281c36a69e9}, intrahash = {3e0a291d4193c824616c6f73ad0a4101}, issn = {0090-6905}, issue = {5}, journal = {Journal of Psycholinguistic Research }, keyword = {Behavioral Science}, pages = {317-330}, publisher = {Springer Netherlands}, title = {Lexical co-occurrence and association strength}, url = {http://dx.doi.org/10.1007/BF01074363}, volume = 19, year = 1990 } @inproceedings{liu2006web, author = {Liu, Vinci and Curran, James R.}, booktitle = {EACL}, crossref = {conf/eacl/2006}, ee = {http://acl.ldc.upenn.edu/E/E06/E06-1030.pdf}, interhash = {f966023df4185e781314340265d3df1f}, intrahash = {934a38bd8d7696cff1da3a2df3724407}, isbn = {1-932432-59-0}, publisher = {The Association for Computer Linguistics}, title = {Web Text Corpus for Natural Language Processing.}, url = {http://dblp.uni-trier.de/db/conf/eacl/eacl2006.html#LiuC06}, year = 2006 } @book{helbig2008wissensverarbeitung, abstract = {Das Buch gibt eine umfassende Darstellung einer Methodik zur Interpretation und Bedeutungsrepr{\"a}sentation nat{\"u}rlichsprachlicher Ausdr{\"u}cke. Diese Methodik der Mehrschichtigen Erweiterten Semantischen Netze (MultiNet) ist sowohl f{\"u}r theoretische Untersuchungen als auch f{\"u}r die automatische Verarbeitung nat{\"u}rlicher Sprache auf dem Rechner geeignet. Die vorgestellten Ergebnisse sind eingebettet in ein System von Software-Werkzeugen, die eine praktische Nutzung der MultiNet-Darstellungsmittel als Formalismus zur Bedeutungsrepr{\"a}sentation sichern. Hierzu geh{\"o}ren: eine Werkbank f{\"u}r den Wissensingenieur, ein {\"U}bersetzungssystem zur automatischen Gewinnung von Bedeutungsdarstellungen nat{\"u}rlichsprachlicher S{\"a}tze und eine Werkbank f{\"u}r den Computerlexikographen.}, address = {Berlin}, author = {Helbig, Hermann}, doi = {10.1007/978-3-540-76278-2}, edition = {2.}, file = {Amazon Search inside:http\://www.amazon.de/gp/reader/3540762760/:URL}, interhash = {f7c09fb5257be21200f6a9622c5d301c}, intrahash = {6eff05ef4aa01e934aa45df7a7ad3154}, isbn = {3-540-76276-0}, publisher = {Springer}, title = {{Wissensverarbeitung und die Semantik der nat{\"u}rlichen Sprache: Wissensrepr{\"a}sentation mit MultiNet}}, year = 2008 } @inproceedings{jarmasz2003rogets, abstract = {We have implemented a system that measures semantic similarity using a computerized 1987 Roget's Thesaurus, and evaluated it by performing a few typical tests. We compare the results of these tests with those produced by WordNet-based similarity measures. One of the benchmarks is Miller and Charles� list of 30 noun pairs to which human judges had assigned similarity measures. We correlate these measures with those computed by several NLP systems. The 30 pairs can be traced back to Rubenstein and Goodenough�s 65 pairs, which we have also studied. Our Roget�s-based system gets correlations of .878 for the smaller and .818 for the larger list of noun pairs; this is quite close to the .885 that Resnik obtained when he employed humans to replicate the Miller and Charles experiment. We further evaluate our measure by using Roget�s and WordNet to answer 80 TOEFL, 50 ESL and 300 Reader�s Digest questions: the correct synonym must be selected amongst a group of four words. Our system gets 78.75\%, 82.00\% and 74.33\% of the questions respectively.}, author = {Jarmasz, Mario and Szpakowicz, Stan}, booktitle = {Conference on Recent Advances in Natural Language Processing}, interhash = {e28cc3a4231e064f44cfdb2e3338aaf3}, intrahash = {acde39a427ef0e7501f07e8b067a88f0}, pages = {212--219}, title = {Roget's thesaurus and semantic similarity}, url = {http://www.site.uottawa.ca/~mjarmasz/pubs/jarmasz_roget_sim.pdf}, year = 2003 } @book{jurafsky2000speech, asin = {0130950696}, author = {Jurafsky, Daniel and Martin, James H.}, dewey = {410.285}, ean = {9780130950697}, edition = 1, interhash = {ae1205b1f526d068fc9364510bf99418}, intrahash = {25110e6691b5ee9dbe97216ce087487f}, isbn = {0130950696}, note = {neue Auflage kommt im Frühjahr 2008}, publisher = {Prentice Hall}, title = {Speech and Language Processing: An Introduction to Natural Language Processing, Computational Linguistics and Speech Recognition (Prentice Hall Series in Artificial Intelligence)}, url = {http://www.amazon.com/gp/redirect.html%3FASIN=0130950696%26tag=ws%26lcode=xm2%26cID=2025%26ccmID=165953%26location=/o/ASIN/0130950696%253FSubscriptionId=13CT5CVB80YFWJEPWS02}, year = 2000 } @inproceedings{cimiano2003automaticb, author = {Cimiano, Philipp and Staab, Steffen and Tane, Julien}, booktitle = {Proceedings of the ECML/PKDD Workshop on Adaptive Text Extraction and Mining, Cavtat-Dubrovnik, Croatia}, interhash = {2f9df79fa0d890faa91dc1d0d0def735}, intrahash = {c62b4e1dc65490d68bef7eaed01f83ea}, lastdatemodified = {2007-03-22}, lastname = {Cimiano}, own = {notown}, pages = {10-17}, pdf = {cimiano03-automatic.pdf}, read = {notread}, title = {Automatic Acquisition of Taxonomies from Text: FCA meets NLP}, url = {\url{http://www.aifb.uni-karlsruhe.de/WBS/pci/ontolearning.pdf}}, year = 2003 } @article{ponte1998lma, author = {Ponte, J.M. and Croft, W.B.}, booktitle = {Proceedings of the 21st annual international ACM SIGIR conference on Research and development in information retrieval}, interhash = {7d5d602886fa34e485cf6194f70bd793}, intrahash = {229b65aa2b99b2f27bc990840e79b3eb}, organization = {ACM New York, NY, USA}, pages = {275--281}, title = {{A language modeling approach to information retrieval}}, year = 1998 } @inproceedings{5273871, abstract = {This paper introduces WikiOnto: a system that assists in the extraction and modeling of topic ontologies in a semi-automatic manner using a preprocessed document corpus derived from Wikipedia. Based on the Wikipedia XML Corpus, we present a three-tiered framework for extracting topic ontologies in quick time and a modeling environment to refine these ontologies. Using natural language processing (NLP) and other machine learning (ML) techniques along with a very rich document corpus, this system proposes a solution to a task that is generally considered extremely cumbersome. The initial results of the prototype suggest strong potential of the system to become highly successful in ontology extraction and modeling and also inspire further research on extracting ontologies from other semi-structured document corpora as well.}, author = {Silva, L. De and Jayaratne, L.}, booktitle = {Applications of Digital Information and Web Technologies, 2009. ICADIWT '09. Second International Conference on the}, doi = {10.1109/ICADIWT.2009.5273871}, interhash = {c1996cb9e69de56e2bb2f8e763fe0482}, intrahash = {66bec053541e521fbe68c0119806ae49}, month = {Aug.}, pages = {446-451}, title = {Semi-automatic extraction and modeling of ontologies using Wikipedia XML Corpus}, url = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?isnumber=5273826&arnumber=5273871&count=156&index=116}, year = 2009 } @inproceedings{breck2007identifying, abstract = {While traditional information extraction systems have been built to answer questions about facts, subjective information extraction systems will answer questions about feelings and opinions. A crucial step towards this goal is identifying the words and phrases that express opinions in text. Indeed, although much previous work has relied on the identification of opinion expressions for a variety of sentiment-based NLP tasks, none has focused directly on this important supporting task. Moreover, none of the proposed methods for identification of opinion expressions has been evaluated at the task that they were designed to perform. We present an approach for identifying opinion expressions that uses conditional random fields and we evaluate the approach at the expression-level using a standard sentiment corpus. Our approach achieves expression-level performance within 5% of the human interannotator agreement.}, address = {San Francisco, CA, USA}, author = {Breck, Eric and Choi, Yejin and Cardie, Claire}, booktitle = {IJCAI'07: Proceedings of the 20th International Joint Conference on Artifical Intelligence}, interhash = {cca75dca85a5c73eb48503416d32d7e0}, intrahash = {0526efb8c2a3ca5f5677010032a0cdfe}, location = {Hyderabad, India}, pages = {2683--2688}, publisher = {Morgan Kaufmann Publishers Inc.}, title = {Identifying expressions of opinion in context}, url = {http://portal.acm.org/citation.cfm?id=1625707}, year = 2007 } @book{0130950696, asin = {0130950696}, author = {Jurafsky, Daniel and Martin, James H.}, dewey = {410.285}, ean = {9780130950697}, edition = 1, interhash = {ae1205b1f526d068fc9364510bf99418}, intrahash = {25110e6691b5ee9dbe97216ce087487f}, isbn = {0130950696}, publisher = {Prentice Hall}, title = {Speech and Language Processing: An Introduction to Natural Language Processing, Computational Linguistics and Speech Recognition (Prentice Hall Series in Artificial Intelligence)}, url = {http://www.amazon.com/Speech-Language-Processing-Introduction-Computational/dp/0130950696%3FSubscriptionId%3D13CT5CVB80YFWJEPWS02%26tag%3Dws%26linkCode%3Dxm2%26camp%3D2025%26creative%3D165953%26creativeASIN%3D0130950696}, year = 2000 } @inproceedings{fleischman03, abstract = {We examine the problems with automated recommendation systems when information about user preferences is limited. We equate the problem to one of content similarity measurement and apply techniques from Natural Language Processing to the domain of movie recommendation. We describe two algorithms, a naïve word-space approach and a more sophisticated approach using topic signatures, and evaluate their performance compared to baseline, gold standard, and commercial systems.}, address = {New York, NY, USA}, author = {Fleischman, Michael and Hovy, Eduard}, booktitle = {IUI '03: Proceedings of the 8th international conference on Intelligent user interfaces}, doi = {http://doi.acm.org/10.1145/604045.604087}, interhash = {e03b7aa4d0ff1bc505e59e0baf87074d}, intrahash = {2435153caffd1b0e9c75a78dbeafd62b}, isbn = {1-58113-586-6}, location = {Miami, Florida, USA}, pages = {242--244}, publisher = {ACM}, title = {Recommendations without user preferences: a natural language processing approach}, url = {http://portal.acm.org/citation.cfm?id=604087}, year = 2003 } @inproceedings{nldb05, address = {Alicante, Spain}, author = {Cimiano, Philipp and Völker, Johanna}, booktitle = {Proceedings of the 10th International Conference on Applications of Natural Language to Information Systems (NLDB)}, editor = {Montoyo, Andres and Munoz, Rafael and Metais, Elisabeth}, interhash = {c90cb094c9f4f3cca1214d0478ffeb07}, intrahash = {072436e5adc4f5fdc39f4baeaa55b077}, month = JUN, pages = {227-238}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, title = {Text2Onto - A Framework for Ontology Learning and Data-driven Change Discovery}, url = {\url{http://www.aifb.uni-karlsruhe.de/WBS/jvo/publications/Text2Onto_nldb_2005.pdf}}, volume = 3513, year = 2005 } @article{charniak97statistical, author = {Charniak, Eugene}, interhash = {9f0c334b655cfa509f9862a0569cd375}, intrahash = {1d02e8f9d663f5cd8203ec6685a958ed}, journal = {AI Magazine}, number = 4, pages = {33-44}, title = {Statistical Techniques for Natural Language Parsing}, url = {http://citeseer.ist.psu.edu/article/charniak97statistical.html}, volume = 18, year = 1997 }