@electronic{www.stm-assoc.org, title = {Microsoft Word - 2013_11_11_Text_and_Data_Mining_Declaration.doc - 2013_11_11_Text_and_Data_Mining_Declaration.pdf}, url = {http://www.stm-assoc.org/2013_11_11_Text_and_Data_Mining_Declaration.pdf}, biburl = {https://puma.uni-kassel.de/url/332b7f5b1b6633c94bef77be3ccadfa5/hotho}, keywords = {access data dataset journal mining paper sota text}, added-at = {2013-11-27T08:54:41.000+0100}, description = {}, interhash = {332b7f5b1b6633c94bef77be3ccadfa5}, intrahash = {332b7f5b1b6633c94bef77be3ccadfa5} } @electronic{evanjones.ca, title = {Extracting Text from Wikipedia}, url = {http://evanjones.ca/software/wikipedia2text.html}, biburl = {https://puma.uni-kassel.de/url/7ee155b9d5b03240e99556cbf828f202/benz}, keywords = {data dataset plain_text python text tool wiki wikipedia}, added-at = {2011-02-04T16:07:25.000+0100}, description = {}, interhash = {7ee155b9d5b03240e99556cbf828f202}, intrahash = {7ee155b9d5b03240e99556cbf828f202} } @electronic{acl-arc.comp.nus.edu.sg, title = {ACL Anthology Reference Corpus (ACL ARC)}, url = {http://acl-arc.comp.nus.edu.sg/}, biburl = {https://puma.uni-kassel.de/url/02e1b1ee344b89910970681457820bca/hotho}, keywords = {acl dataset paper pdf text}, added-at = {2010-03-19T10:55:58.000+0100}, description = {}, interhash = {02e1b1ee344b89910970681457820bca}, intrahash = {02e1b1ee344b89910970681457820bca} } @electronic{people.csail.mit.edu, title = {Home Page for 20 Newsgroups Data Set}, url = {http://people.csail.mit.edu/jrennie/20Newsgroups/}, biburl = {https://puma.uni-kassel.de/url/bbb84b0f8c4f9921aaa40e1a07190279/hotho}, keywords = {20 dataset newsgroups text}, added-at = {2008-04-12T15:32:30.000+0200}, description = {The 20 Newsgroups data set}, interhash = {bbb84b0f8c4f9921aaa40e1a07190279}, intrahash = {bbb84b0f8c4f9921aaa40e1a07190279} } @electronic{kdd.ics.uci.edu, title = {20 Newsgroups}, url = {http://kdd.ics.uci.edu/databases/20newsgroups/20newsgroups.html}, biburl = {https://puma.uni-kassel.de/url/6ee032e41e462edbe7baf2db309d0370/hotho}, keywords = {20 dataset newsgroups text}, added-at = {2008-04-12T15:32:12.000+0200}, description = {20 Newsgroups Abstract This data set consists of 20000 messages taken from 20 Usenet newsgroups. Information files: description of the data Data files: 20_newsgroups.tar.gz (17.3M; 61.6M uncompressed) mini_newsgroups.tar.gz A subset composed of 100 articles from each newsgroup. (1.9M; 6.2M uncompressed)}, interhash = {6ee032e41e462edbe7baf2db309d0370}, intrahash = {6ee032e41e462edbe7baf2db309d0370} } @electronic{mlkd.csd.auth.gr, title = {Multilabel Classification}, url = {http://mlkd.csd.auth.gr/multilabel.html}, biburl = {https://puma.uni-kassel.de/url/ca1734ab575c5ca632a28d7b59aa4d5b/hotho}, keywords = {classification dataset extension multilabel text tools weka}, added-at = {2007-11-23T13:12:59.000+0100}, description = {Multi-Label Classification}, interhash = {ca1734ab575c5ca632a28d7b59aa4d5b}, intrahash = {ca1734ab575c5ca632a28d7b59aa4d5b} } @electronic{aune.lpl.univ-aix.fr, title = {Multext}, url = {http://aune.lpl.univ-aix.fr/projects/multext/}, biburl = {https://puma.uni-kassel.de/url/aadc8b8fde1c4cc4affcfa02f649f6be/hotho}, keywords = {corpus dataset text}, added-at = {2007-11-16T17:36:20.000+0100}, description = {}, interhash = {aadc8b8fde1c4cc4affcfa02f649f6be}, intrahash = {aadc8b8fde1c4cc4affcfa02f649f6be} } @electronic{www.cs.cornell.edu, title = {Datasets from transcripts of US Congressional floor debates}, url = {http://www.cs.cornell.edu/home/llee/data/convote.html}, biburl = {https://puma.uni-kassel.de/url/59a77db17bc3f77f9da8b3a7e0824a71/hotho}, keywords = {classification dataset text}, added-at = {2007-02-06T21:26:30.000+0100}, description = {Congressional speech data}, interhash = {59a77db17bc3f77f9da8b3a7e0824a71}, intrahash = {59a77db17bc3f77f9da8b3a7e0824a71} } @electronic{sinai.ujaen.es, title = {HepCorpus - Sinai}, url = {http://sinai.ujaen.es/wiki/index.php/HepCorpus#English_version}, biburl = {https://puma.uni-kassel.de/url/2a94f6d49e5f0d09fcd7d47ead339d62/hotho}, keywords = {text dataset corpus}, added-at = {2006-05-29T15:53:16.000+0200}, description = {}, interhash = {2a94f6d49e5f0d09fcd7d47ead339d62}, intrahash = {2a94f6d49e5f0d09fcd7d47ead339d62} } @electronic{www.bmanuel.org, title = {Manuel Barbera, Corpus based computational linguistic resources. General: E-Texts (ยง 2.3).}, url = {http://www.bmanuel.org/clr2_et.html}, biburl = {https://puma.uni-kassel.de/url/35e334c2b9116aa25f7e10bed79d0347/hotho}, keywords = {text dataset corpus}, added-at = {2006-05-26T08:21:51.000+0200}, description = {Electronic Literary Text Archives.}, interhash = {35e334c2b9116aa25f7e10bed79d0347}, intrahash = {35e334c2b9116aa25f7e10bed79d0347} } @electronic{sourceforge.net, title = {SourceForge.net: Files}, url = {http://sourceforge.net/project/showfiles.php?group_id=5091&package_id=95362&release_id=399264}, biburl = {https://puma.uni-kassel.de/url/a81d6018be1dc1cd729c0c5e696294d9/hotho}, keywords = {weka text dataset}, added-at = {2006-03-07T08:26:04.000+0100}, description = {New text datasets (donated by George Forman) are available for download on Sourceforge:}, interhash = {a81d6018be1dc1cd729c0c5e696294d9}, intrahash = {a81d6018be1dc1cd729c0c5e696294d9} }