@electronic{web-ngram.research.microsoft.com, title = {Microsoft Research - Speller Challenge Datasets}, url = {http://web-ngram.research.microsoft.com/spellerchallenge/DataSets.aspx}, biburl = {https://puma.uni-kassel.de/url/ec1f2805048b9ac374691a1774514620/benz}, keywords = {challenge dataset search_engine speller_challenge spelling}, added-at = {2011-03-16T23:23:07.000+0100}, description = {Microsoft Research Speller Challenge}, interhash = {ec1f2805048b9ac374691a1774514620}, intrahash = {ec1f2805048b9ac374691a1774514620} } @electronic{www.pearsonlongman.com, title = {Longman Dictionaries - Dictionaries for Research}, url = {http://www.pearsonlongman.com/dictionaries/research/dict-research.html}, biburl = {https://puma.uni-kassel.de/url/30a773117f918e4d932e0c64e5ec6a38/benz}, keywords = {dataset dictionary disambiguation ldoce}, added-at = {2011-02-18T23:23:09.000+0100}, description = {Pearson Longman English Language Teaching (Pearson Longman ELT) is a leading educational publisher of quality resources for all ages and abilities across the curriculum, providing solutions for teachers and students.}, interhash = {30a773117f918e4d932e0c64e5ec6a38}, intrahash = {30a773117f918e4d932e0c64e5ec6a38} } @electronic{www.yr-bcn.es, title = {Semantically Annotated Snapshot of the English Wikipedia (SW v.1)}, url = {http://www.yr-bcn.es/semanticWikipedia}, biburl = {https://puma.uni-kassel.de/url/a703bcf3b3890f8a4a61f052228bae8f/benz}, keywords = {semantics dataset wikipedia annotated ontology}, added-at = {2011-02-04T16:08:40.000+0100}, description = {}, interhash = {a703bcf3b3890f8a4a61f052228bae8f}, intrahash = {a703bcf3b3890f8a4a61f052228bae8f} } @electronic{twapperkeeper.com, title = {Twapper Keeper - Archive Tweets}, url = {http://twapperkeeper.com/}, biburl = {https://puma.uni-kassel.de/url/273e6fdd06c89ce7a09eeb2b1598ee09/benz}, keywords = {dataset twapper twapper_keeper twitter}, added-at = {2011-02-04T16:07:48.000+0100}, description = {Allows you to archive and organize your tweets based upon hash tags.}, interhash = {273e6fdd06c89ce7a09eeb2b1598ee09}, intrahash = {273e6fdd06c89ce7a09eeb2b1598ee09} } @electronic{www.mail-archive.com, title = {[twitter-dev] Re: Tweet Corpus creation for NLP research}, url = {http://www.mail-archive.com/twitter-development-talk@googlegroups.com/msg05715.html}, biburl = {https://puma.uni-kassel.de/url/1b9716d1f2674e18b3016d3ac6edc16a/benz}, keywords = {dataset twitter}, added-at = {2011-02-04T16:07:48.000+0100}, description = {}, interhash = {1b9716d1f2674e18b3016d3ac6edc16a}, intrahash = {1b9716d1f2674e18b3016d3ac6edc16a} } @electronic{www.eml-research.de, title = {Download Wikipedia Category Taxonomy}, url = {http://www.eml-research.de/english/research/nlp/download/wikitaxonomy.php}, biburl = {https://puma.uni-kassel.de/url/9d33bae08b8defaa95cda39608876bf6/benz}, keywords = {categories category_hierarchy dataset download hierarchy ontology taxonomy wikipedia}, added-at = {2011-02-04T16:07:33.000+0100}, description = {}, interhash = {9d33bae08b8defaa95cda39608876bf6}, intrahash = {9d33bae08b8defaa95cda39608876bf6} } @electronic{socialnetworks.mpi-sws.org, title = {Online Social Networks Research @MPI-SWS}, url = {http://socialnetworks.mpi-sws.org/}, biburl = {https://puma.uni-kassel.de/url/bd1d323860ba4faafd18250421c1dc94/benz}, keywords = {dataset download misvlove social_network}, added-at = {2011-02-04T16:07:28.000+0100}, description = {}, interhash = {bd1d323860ba4faafd18250421c1dc94}, intrahash = {bd1d323860ba4faafd18250421c1dc94} } @electronic{www.p2p.tu-darmstadt.de, title = {Research}, url = {http://www.p2p.tu-darmstadt.de/research/}, biburl = {https://puma.uni-kassel.de/url/f518aee1c775b508e6145577ba08264a/benz}, keywords = {dataset social_networks socialnetwork}, added-at = {2011-02-04T16:07:27.000+0100}, description = {}, interhash = {f518aee1c775b508e6145577ba08264a}, intrahash = {f518aee1c775b508e6145577ba08264a} } @electronic{evanjones.ca, title = {Extracting Text from Wikipedia}, url = {http://evanjones.ca/software/wikipedia2text.html}, biburl = {https://puma.uni-kassel.de/url/7ee155b9d5b03240e99556cbf828f202/benz}, keywords = {data dataset plain_text python text tool wiki wikipedia}, added-at = {2011-02-04T16:07:25.000+0100}, description = {}, interhash = {7ee155b9d5b03240e99556cbf828f202}, intrahash = {7ee155b9d5b03240e99556cbf828f202} } @electronic{infochimps.org, title = {Twitter data sets for download - Infochimps}, url = {http://infochimps.org/tags/twitter}, biburl = {https://puma.uni-kassel.de/url/6d90faa71befc28ab285391e88270b86/benz}, keywords = {dataset download twitter}, added-at = {2011-02-04T16:07:23.000+0100}, description = {}, interhash = {6d90faa71befc28ab285391e88270b86}, intrahash = {6d90faa71befc28ab285391e88270b86} } @electronic{infochimps.org, title = {Infochimps Data Marketplace / Commons: Download Sell or Share Databases, statistics, data sets for free}, url = {http://infochimps.org/}, biburl = {https://puma.uni-kassel.de/url/f09284bceef641abc13023fb04fe5ddc/benz}, keywords = {data dataset datasets download search}, added-at = {2011-02-04T16:07:23.000+0100}, description = {Find and download data in any format, from financial to social networking to GIS data. Or sell data in our data marketplace, at a price you set. We have large data sets, spreadsheets, and databases packed with statistics.}, interhash = {f09284bceef641abc13023fb04fe5ddc}, intrahash = {f09284bceef641abc13023fb04fe5ddc} } @electronic{an.kaist.ac.kr, title = {What is Twitter, a Social Network or a News Media? - WWW'10}, url = {http://an.kaist.ac.kr/traces/WWW2010.html}, biburl = {https://puma.uni-kassel.de/url/39fbd40f2a9a8d4682162507b2a41924/benz}, keywords = {dataset twitter www www2010}, added-at = {2011-02-04T16:07:23.000+0100}, description = {}, interhash = {39fbd40f2a9a8d4682162507b2a41924}, intrahash = {39fbd40f2a9a8d4682162507b2a41924} } @electronic{www.angela-bohn.de, title = {Social Network Data}, url = {http://www.angela-bohn.de/data.html}, biburl = {https://puma.uni-kassel.de/url/f12e6a09d67816c3b8cdfcc628488675/benz}, keywords = {data dataset sna social_network}, added-at = {2011-02-04T16:07:16.000+0100}, description = {}, interhash = {f12e6a09d67816c3b8cdfcc628488675}, intrahash = {f12e6a09d67816c3b8cdfcc628488675} } @electronic{km.aifb.kit.edu, title = {Billion Triple Challenge 2010 Dataset}, url = {http://km.aifb.kit.edu/projects/btc-2010/}, biburl = {https://puma.uni-kassel.de/url/0bd6afefa4fa9b8deb385d518ea6d32f/benz}, keywords = {billion_triple data dataset semantic semantic_web}, added-at = {2011-02-04T16:07:16.000+0100}, description = {}, interhash = {0bd6afefa4fa9b8deb385d518ea6d32f}, intrahash = {0bd6afefa4fa9b8deb385d518ea6d32f} } @electronic{plg.uwaterloo.ca, title = {Spam dataset}, url = {http://plg.uwaterloo.ca/~gvcormac/treccorpus07/}, biburl = {https://puma.uni-kassel.de/url/501ec074d8507358fca772f3107e51d2/benz}, keywords = {dataset spam}, added-at = {2011-02-04T16:07:08.000+0100}, description = {}, interhash = {501ec074d8507358fca772f3107e51d2}, intrahash = {501ec074d8507358fca772f3107e51d2} } @electronic{blog.stackoverflow.com, title = {Stack Overflow Creative Commons Data Dump - Blog – Stack Overflow}, url = {http://blog.stackoverflow.com/2009/06/stack-overflow-creative-commons-data-dump/}, biburl = {https://puma.uni-kassel.de/url/a7fa4f7d006797bc78d35471f0a06c51/benz}, keywords = {data dataset stackoverflow}, added-at = {2011-02-04T16:06:58.000+0100}, description = {}, interhash = {a7fa4f7d006797bc78d35471f0a06c51}, intrahash = {a7fa4f7d006797bc78d35471f0a06c51} } @electronic{boston.lti.cs.cmu.edu, title = {The ClueWeb09 Dataset}, url = {http://boston.lti.cs.cmu.edu/Data/clueweb09/}, biburl = {https://puma.uni-kassel.de/url/e72d177444d58aa6b449daecdc8fa659/benz}, keywords = {clueweb dataset research web}, added-at = {2011-02-04T16:06:58.000+0100}, description = {}, interhash = {e72d177444d58aa6b449daecdc8fa659}, intrahash = {e72d177444d58aa6b449daecdc8fa659} } @electronic{lshtc.iit.demokritos.gr, title = {Call for Participation | Second Pascal Challenge on Large Scale Hierarchical Text classification}, url = {http://lshtc.iit.demokritos.gr/}, biburl = {https://puma.uni-kassel.de/url/473be765bc5416cd619de9e7ac5e40bf/benz}, keywords = {2011 challenge dataset dmoz text_classification wikipedia workshop}, added-at = {2011-02-04T16:06:38.000+0100}, description = {Following a successful first edition, we are pleased to announce the 2nd edition of the Large Scale Hierarchical Text Classification (LSHTC) Pascal Challenge. The LSHTC Challenge is a hierarchical text classification competition, using large datasets. This year’s challenge will increase the scale and the difficulty of the task, using data from Wikipedia (www.wikipedia.org), in addition to the ODP Web directory data (www.dmoz.org).}, interhash = {473be765bc5416cd619de9e7ac5e40bf}, intrahash = {473be765bc5416cd619de9e7ac5e40bf} } @electronic{scientext.msh-alpes.fr, title = {Summary - Scientext}, url = {http://scientext.msh-alpes.fr/scientext-site-en/spip.php?article1}, biburl = {https://puma.uni-kassel.de/url/a903396473ececf187b3bd655b0f5270/benz}, keywords = {dataset english french science scientext texts}, added-at = {2011-02-04T16:06:37.000+0100}, description = {Scientext is a new, on-line French and English corpus of scientific texts. The corpus includes 4.8 million running tokens in French, 13 million words of research articles in English (medicine and biology), and an English-language sub-corpus of French undergraduate students’ texts (1,1 million words). The corpus is organized to facilitate the linguistic study of authorial position and reasoning in scientific articles through phraseology and lexico-grammatical markers linked to causality.}, interhash = {a903396473ececf187b3bd655b0f5270}, intrahash = {a903396473ececf187b3bd655b0f5270} } @electronic{i.stanford.edu, title = {Mining of Massive Datasets}, url = {http://i.stanford.edu/~ullman/mmds.html}, biburl = {https://puma.uni-kassel.de/url/c126718938b617b07b2c5344a34a2816/benz}, keywords = {data data_mining dataset massive}, added-at = {2011-02-04T16:06:37.000+0100}, description = {}, interhash = {c126718938b617b07b2c5344a34a2816}, intrahash = {c126718938b617b07b2c5344a34a2816} }