@electronic{fr46.uni-saarland.de, title = {UdS :: FR 4.6:  Register im Kontakt}, url = {http://fr46.uni-saarland.de/index.php?id=regico}, biburl = {https://puma.uni-kassel.de/url/02bd78b005b224e20d38eb8b5c48d359/hotho}, keywords = {corpus dataset hd linguistics}, added-at = {2014-09-29T17:49:06.000+0200}, description = {Universität des Saarlandes,Übersetzung,Dolmetschen,Angewandte Sprachwissenschaft,UdS}, interhash = {02bd78b005b224e20d38eb8b5c48d359}, intrahash = {02bd78b005b224e20d38eb8b5c48d359} } @electronic{notes.jan-oliver-ruediger.de, title = {CorpusExplorer | Notes - Jan Oliver Rüdiger}, url = {http://notes.jan-oliver-ruediger.de/corpusexplorer/}, biburl = {https://puma.uni-kassel.de/url/abb4a1a5e0f297f674e3c947e6fa8ac8/stumme}, keywords = {UniK corpus explorer kassel}, added-at = {2013-06-25T18:56:21.000+0200}, description = {}, interhash = {abb4a1a5e0f297f674e3c947e6fa8ac8}, intrahash = {abb4a1a5e0f297f674e3c947e6fa8ac8} } @electronic{googleresearch.blogspot.de, title = {50,000 Lessons on How to Read: a Relation Extraction Corpus}, url = {http://googleresearch.blogspot.de/2013/04/50000-lessons-on-how-to-read-relation.html}, biburl = {https://puma.uni-kassel.de/url/581c1631a929f1162f86e02337d50478/hotho}, keywords = {corpus dataset extraction relation}, added-at = {2013-04-17T09:57:10.000+0200}, description = {}, interhash = {581c1631a929f1162f86e02337d50478}, intrahash = {581c1631a929f1162f86e02337d50478} } @electronic{trec.nist.gov, title = {Tweets2011 Twitter Collection}, url = {http://trec.nist.gov/data/tweets/}, biburl = {https://puma.uni-kassel.de/url/6114f26ca8c1a62f39518e1eadc722bc/hotho}, keywords = {corpus dataset everyaware twitter}, added-at = {2011-09-02T10:41:42.000+0200}, description = {Tweets2011 As part of the TREC 2011 microblog track, Twitter provided identifiers for approximately 16 million tweets sampled between January 23rd and February 8th, 2011. The corpus is designed to be a reusable, representative sample of the twittersphere - i.e. both important and spam tweets are included.}, interhash = {6114f26ca8c1a62f39518e1eadc722bc}, intrahash = {6114f26ca8c1a62f39518e1eadc722bc} } @electronic{blog.afterthedeadline.com, title = {Generating a Plain Text Corpus from Wikipedia « After the Deadline}, url = {http://blog.afterthedeadline.com/2009/12/04/generating-a-plain-text-corpus-from-wikipedia/}, biburl = {https://puma.uni-kassel.de/url/5045b81aa6aab7b43ab143dd279e3100/benz}, keywords = {corpus howto manual plaintext software tool wikipedia}, added-at = {2011-02-04T16:06:58.000+0100}, description = {}, interhash = {5045b81aa6aab7b43ab143dd279e3100}, intrahash = {5045b81aa6aab7b43ab143dd279e3100} } @electronic{www.kdnuggets.com, title = {Datasets for Data Mining, Analytics and Knowledge Discovery}, url = {http://www.kdnuggets.com/datasets/}, biburl = {https://puma.uni-kassel.de/url/737acd43d0c68d6bcbe9ec0dd2e6abf7/benz}, keywords = {copora corpus data datasets kdd overview}, added-at = {2011-02-04T16:06:58.000+0100}, description = {Datasets for testing Data Mining, Analytics, and Knowledge Discovery algorithms}, interhash = {737acd43d0c68d6bcbe9ec0dd2e6abf7}, intrahash = {737acd43d0c68d6bcbe9ec0dd2e6abf7} } @electronic{ucrel.lancs.ac.uk, title = {Wmatrix corpus analysis and comparison tool}, url = {http://ucrel.lancs.ac.uk/wmatrix/}, biburl = {https://puma.uni-kassel.de/url/be9637a5527dc856053e0e82932be344/benz}, keywords = {corpus linguistics paul_rayson wmatrix}, added-at = {2011-02-04T16:06:50.000+0100}, description = {Wmatrix is a software tool for corpus analysis and comparison. It provides a web interface to the USAS and CLAWS corpus annotation tools, and standard corpus linguistic methodologies such as frequency lists and concordances. It also extends the keywords method to key grammatical categories and key semantic domains. }, interhash = {be9637a5527dc856053e0e82932be344}, intrahash = {be9637a5527dc856053e0e82932be344} } @electronic{www.cs.pitt.edu, title = {MPQA Releases}, url = {http://www.cs.pitt.edu/mpqa/}, biburl = {https://puma.uni-kassel.de/url/f1343ddd814a4f0fb5cdb52db55afa0b/hotho}, keywords = {corpus dataset mpqa opinion}, added-at = {2010-03-17T11:31:14.000+0100}, description = {}, interhash = {f1343ddd814a4f0fb5cdb52db55afa0b}, intrahash = {f1343ddd814a4f0fb5cdb52db55afa0b} } @electronic{devoted.to, title = {David Lee's Bookmarks for Corpus-based Linguists}, url = {http://devoted.to/corpora}, biburl = {https://puma.uni-kassel.de/url/714a6270040c564229df5c60db472484/hotho}, keywords = {corpus dataset lecture nlp survey}, added-at = {2008-04-29T15:03:05.000+0200}, description = {}, interhash = {714a6270040c564229df5c60db472484}, intrahash = {714a6270040c564229df5c60db472484} } @electronic{www.grsampson.net, title = {Geoffrey Sampson: Downloadable Resources}, url = {http://www.grsampson.net/Resources.html}, biburl = {https://puma.uni-kassel.de/url/85e6aa977e160d792141038d38915682/hotho}, keywords = {corpus dataset lecture nlp tm}, added-at = {2008-04-29T12:09:45.000+0200}, description = {}, interhash = {85e6aa977e160d792141038d38915682}, intrahash = {85e6aa977e160d792141038d38915682} } @electronic{www.linguistlist.org, title = {Linguist List - Web Resource Listings}, url = {http://www.linguistlist.org/sp/Texts.html}, biburl = {https://puma.uni-kassel.de/url/70d16dc2e74cf3bb1f8fe4f8dbd8f17f/hotho}, keywords = {corpus dataset lecture nlp}, added-at = {2008-04-29T12:06:42.000+0200}, description = {}, interhash = {70d16dc2e74cf3bb1f8fe4f8dbd8f17f}, intrahash = {70d16dc2e74cf3bb1f8fe4f8dbd8f17f} } @electronic{aune.lpl.univ-aix.fr, title = {Multext}, url = {http://aune.lpl.univ-aix.fr/projects/multext/}, biburl = {https://puma.uni-kassel.de/url/aadc8b8fde1c4cc4affcfa02f649f6be/hotho}, keywords = {corpus dataset text}, added-at = {2007-11-16T17:36:20.000+0100}, description = {}, interhash = {aadc8b8fde1c4cc4affcfa02f649f6be}, intrahash = {aadc8b8fde1c4cc4affcfa02f649f6be} } @electronic{plg.uwaterloo.ca, title = {Trec Spam Corpus}, url = {http://plg.uwaterloo.ca/~gvcormac/treccorpus/}, biburl = {https://puma.uni-kassel.de/url/612bf65a435736bdb79b895c6b070429/hotho}, keywords = {trec spam set data dataset corpus}, added-at = {2006-09-04T15:42:51.000+0200}, description = {}, interhash = {612bf65a435736bdb79b895c6b070429}, intrahash = {612bf65a435736bdb79b895c6b070429} } @electronic{sinai.ujaen.es, title = {HepCorpus - Sinai}, url = {http://sinai.ujaen.es/wiki/index.php/HepCorpus#English_version}, biburl = {https://puma.uni-kassel.de/url/2a94f6d49e5f0d09fcd7d47ead339d62/hotho}, keywords = {text dataset corpus}, added-at = {2006-05-29T15:53:16.000+0200}, description = {}, interhash = {2a94f6d49e5f0d09fcd7d47ead339d62}, intrahash = {2a94f6d49e5f0d09fcd7d47ead339d62} } @electronic{www.bmanuel.org, title = {Manuel Barbera, Corpus based computational linguistic resources. General: E-Texts (§ 2.3).}, url = {http://www.bmanuel.org/clr2_et.html}, biburl = {https://puma.uni-kassel.de/url/35e334c2b9116aa25f7e10bed79d0347/hotho}, keywords = {text dataset corpus}, added-at = {2006-05-26T08:21:51.000+0200}, description = {Electronic Literary Text Archives.}, interhash = {35e334c2b9116aa25f7e10bed79d0347}, intrahash = {35e334c2b9116aa25f7e10bed79d0347} } @electronic{trec.nist.gov, title = {Reuters Corpora @ NIST}, url = {http://trec.nist.gov/data/reuters/reuters.html}, biburl = {https://puma.uni-kassel.de/url/d15d7cee47d8a0b288b331edbbabd8f2/hotho}, keywords = {text RCV1 reuters corpus}, added-at = {2006-05-19T16:06:18.000+0200}, description = {}, interhash = {d15d7cee47d8a0b288b331edbbabd8f2}, intrahash = {d15d7cee47d8a0b288b331edbbabd8f2} } @electronic{muchmore.dfki.de, title = {much.more}, url = {http://muchmore.dfki.de/resources_index.htm}, biburl = {https://puma.uni-kassel.de/url/5f2e4b0e4d51a2d01853a889f021fee3/hotho}, keywords = {dataset corpus}, added-at = {2006-04-07T10:58:58.000+0200}, description = {A number of resources have been compiled within the context of the MuchMore project. These include: a bilingual, parallel medical corpus; corresponding queries and relevance assessments; evaluation sets of disambiguated terms for GermaNet and UMLS; an evaluation list for morphological decomposition of medical terms.}, interhash = {5f2e4b0e4d51a2d01853a889f021fee3}, intrahash = {5f2e4b0e4d51a2d01853a889f021fee3} } @electronic{www.bmanuel.org, title = {Manuel Barbera, Corpus based computational linguistic resources. General: Tools (§ 2.5).}, url = {http://www.bmanuel.org/clr2_tt.html}, biburl = {https://puma.uni-kassel.de/url/a4403bc9f2f69b0c82556cd8c4c090ad/hotho}, keywords = {computational Corpus tools linguistic based}, added-at = {2005-06-29T10:27:53.000+0200}, description = {}, interhash = {a4403bc9f2f69b0c82556cd8c4c090ad}, intrahash = {a4403bc9f2f69b0c82556cd8c4c090ad} }