@inproceedings{coates2011analysis, abstract = {A great deal of research has focused on algorithms for learning features from unlabeled data. Indeed, much progress has been made on benchmark datasets like NORB and CIFAR-10 by employing increasingly complex unsupervised learning algorithms and deep models. In this paper, however, we show that several simple factors, such as the number of hidden nodes in the model, may be more important to achieving high performance than the learning algorithm or the depth of the model. Specifically, we will apply several off-the-shelf feature learning algorithms (sparse auto-encoders, sparse RBMs, K-means clustering, and Gaussian mixtures) to CIFAR-10, NORB, and STL datasets using only single-layer networks. We then present a detailed analysis of the effect of changes in the model setup: the receptive field size, number of hidden nodes (features), the step-size ("stride") between extracted features, and the effect of whitening. Our results show that large numbers of hidden nodes and dense feature extraction are critical to achieving high performance - so critical, in fact, that when these parameters are pushed to their limits, we achieve state-of-the-art performance on both CIFAR-10 and NORB using only a single layer of features. More surprisingly, our best performance is based on K-means clustering, which is extremely fast, has no hyper-parameters to tune beyond the model structure itself, and is very easy to implement. Despite the simplicity of our system, we achieve accuracy beyond all previously published results on the CIFAR-10 and NORB datasets (79.6% and 97.2% respectively).}, author = {Coates, A. and Lee, H. and Ng, A.Y.}, booktitle = {Proceedings of the Fourteenth International Conference on Artificial Intelligence and Statistics}, editor = {Gordon, Geoffrey and Dunson, David and Dudík, Miroslav}, interhash = {46cfb4b5b1c16c79a966512e07f67158}, intrahash = {bcb2c1fd335ae57362cdf348ff727589}, pages = {215--223}, publisher = {JMLR W\&CP}, series = {JMLR Workshop and Conference Proceedings}, title = {An analysis of single-layer networks in unsupervised feature learning}, url = {http://jmlr.csail.mit.edu/proceedings/papers/v15/coates11a.html}, volume = 15, year = 2011 } @inproceedings{coates2011detection, abstract = {Reading text from photographs is a challenging problem that has received a significant amount of attention. Two key components of most systems are (i) text detection from images and (ii) character recognition, and many recent methods have been proposed to design better feature representations and models for both. In this paper, we apply methods recently developed in machine learning -- specifically, large-scale algorithms for learning the features automatically from unlabeled data -- and show that they allow us to construct highly effective classifiers for both detection and recognition to be used in a high accuracy end-to-end system.}, author = {Coates, A. and Carpenter, B. and Case, C. and Satheesh, S. and Suresh, B. and Wang, Tao and Wu, D.J. and Ng, A.Y.}, booktitle = {International Conference on Document Analysis and Recognition (ICDAR)}, doi = {10.1109/ICDAR.2011.95}, interhash = {adb17817e5f95605a8066737ce0e8b7e}, intrahash = {b550ca5ec5a8b61b64b17091f7b2eeab}, issn = {1520-5363}, month = sep, pages = {440--445}, title = {Text Detection and Character Recognition in Scene Images with Unsupervised Feature Learning}, url = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=6065350&tag=1}, year = 2011 }