@article{resnik2003parallel, abstract = {Parallel corpora have become an essential resource for work in multilingual natural language processing. In this article, we report on our work using the STRAND system for mining parallel text on the World Wide Web,first reviewing the original algorithm and results and then presenting a set of significant enhancements. These enhancements include the use of supervised learning based on structural features of documents to improve classification performance, a new content-based measure of translational equivalence, and adaptation of the system to take advantage of the Internet Archive for mining parallel text from the Web on a large scale. Finally, the value of these techniques is demonstrated in the construction of a significant parallel corpus for a low-density language pair.}, acmid = {964753}, address = {Cambridge, MA, USA}, author = {Resnik, Philip and Smith, Noah A.}, doi = {10.1162/089120103322711578}, interhash = {b23f5b4586fb7dd07c28c376b08c0eda}, intrahash = {2fdc5044a0d669f6766edaaceaae2bc3}, issn = {0891-2017}, issue_date = {September 2003}, journal = {Computational Linguistics}, month = sep, number = 3, numpages = {32}, pages = {349--380}, publisher = {MIT Press}, title = {The Web as a parallel corpus}, url = {http://dx.doi.org/10.1162/089120103322711578}, volume = 29, year = 2003 } @misc{resnik1995using, abstract = {This paper presents a new measure of semantic similarity in an IS-A taxonomy, based on the notion of information content. Experimental evaluation suggests that the measure performs encouragingly well (a correlation of r = 0.79 with a benchmark set of human similarity judgments, with an upper bound of r = 0.90 for human subjects performing the same task), and significantly better than the traditional edge counting approach (r = 0.66).}, author = {Resnik, Philip}, file = {resnik1995using.pdf:resnik1995using.pdf:PDF}, interhash = {746146003bcba4f1df57044178a1b9ac}, intrahash = {454781d9c6deadeae45d0eba0d0cdf91}, lastdatemodified = {2006-09-25}, lastname = {Resnik}, own = {notown}, pdf = {resnik95-using.pdf}, read = {notread}, title = {Using Information Content to Evaluate Semantic Similarity in a Taxonomy}, url = {http://www.citebase.org/abstract?id=oai:arXiv.org:cmp-lg/9511007}, year = 1995 } @inproceedings{Resnik95, author = {Resnik, Philip}, booktitle = {Proceedings of the XI International Joint Conferences on Artificial}, interhash = {746146003bcba4f1df57044178a1b9ac}, intrahash = {8edbd78ec00b2649fd6b6b5a85fc03ff}, pages = {448-453}, title = {{Using Information Content to Evaluate Semantic Similarity in a Taxonomy}}, year = 1995 }