@inproceedings{coates2011detection, abstract = {Reading text from photographs is a challenging problem that has received a significant amount of attention. Two key components of most systems are (i) text detection from images and (ii) character recognition, and many recent methods have been proposed to design better feature representations and models for both. In this paper, we apply methods recently developed in machine learning -- specifically, large-scale algorithms for learning the features automatically from unlabeled data -- and show that they allow us to construct highly effective classifiers for both detection and recognition to be used in a high accuracy end-to-end system.}, author = {Coates, A. and Carpenter, B. and Case, C. and Satheesh, S. and Suresh, B. and Wang, Tao and Wu, D.J. and Ng, A.Y.}, booktitle = {International Conference on Document Analysis and Recognition (ICDAR)}, doi = {10.1109/ICDAR.2011.95}, interhash = {adb17817e5f95605a8066737ce0e8b7e}, intrahash = {b550ca5ec5a8b61b64b17091f7b2eeab}, issn = {1520-5363}, month = sep, pages = {440--445}, title = {Text Detection and Character Recognition in Scene Images with Unsupervised Feature Learning}, url = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=6065350&tag=1}, year = 2011 } @article{vonahn2008recaptcha, abstract = {CAPTCHAs (Completely Automated Public Turing test to tell Computers and Humans Apart) are widespread security measures on the World Wide Web that prevent automated programs from abusing online services. They do so by asking humans to perform a task that computers cannot yet perform, such as deciphering distorted characters. Our research explored whether such human effort can be channeled into a useful purpose: helping to digitize old printed material by asking users to decipher scanned words from books that computerized optical character recognition failed to recognize. We showed that this method can transcribe text with a word accuracy exceeding 99%, matching the guarantee of professional human transcribers. Our apparatus is deployed in more than 40,000 Web sites and has transcribed over 440 million words.}, author = {von Ahn, Luis and Maurer, Benjamin and McMillen, Colin and Abraham, David and Blum, Manuel}, doi = {10.1126/science.1160379}, eprint = {http://www.sciencemag.org/content/321/5895/1465.full.pdf}, interhash = {9444cd77ddf43b6c19bf689be5b2ef34}, intrahash = {a20d5aa858b63fcf5d2daf908fec874f}, journal = {Science}, number = 5895, pages = {1465--1468}, title = {reCAPTCHA: Human-Based Character Recognition via Web Security Measures}, url = {http://www.sciencemag.org/content/321/5895/1465.abstract}, volume = 321, year = 2008 } @techreport{elahmad2011robustness, abstract = {We report a novel attack on two CAPTCHAs that have been widely deployed on the Internet, one being Google's home design and the other acquired by Google (i.e. reCAPTCHA). With a minor change, our attack program also works well on the latest ReCAPTCHA version, which uses a new defence mechanism that was unknown to us when we designed our attack. This suggests that our attack works in a fundamental level. Our attack appears to be applicable to a whole family of text CAPTCHAs that build on top of the popular segmentation-resistant mechanism of "crowding character together" for security. Next, we propose a novel framework that guides the application of our well-tested security engineering methodology for evaluating CAPTCHA robustness, and we propose a new general principle for CAPTCHA design. }, author = {El Ahmad, Ahmad S and Yan, Jeff and Tayara, Mohamad}, institution = {School of Computer Science, Newcastle University, UK}, interhash = {2d6bb0b3bad1f6a01c15e1bbd8bd7158}, intrahash = {3516bc8c24b04f63927808e82824004d}, month = may, title = {The Robustness of Google CAPTCHAs}, url = {http://homepages.cs.ncl.ac.uk/jeff.yan/google.pdf}, year = 2011 }