Coates, A.; Lee, H. & Ng, A.: An analysis of single-layer networks in unsupervised feature learning. In: Gordon, G.; Dunson, D. & Dudík, M. (Hrsg.): Proceedings of the Fourteenth International Conference on Artificial Intelligence and Statistics. JMLR W&CP, 2011 (JMLR Workshop and Conference Proceedings 15), S. 215-223
[Volltext]
A great deal of research has focused on algorithms for learning features from unlabeled data. Indeed, much progress has been made on benchmark datasets like NORB and CIFAR-10 by employing increasingly complex unsupervised learning algorithms and deep models. In this paper, however, we show that several simple factors, such as the number of hidden nodes in the model, may be more important to achieving high performance than the learning algorithm or the depth of the model. Specifically, we will apply several off-the-shelf feature learning algorithms (sparse auto-encoders, sparse RBMs, K-means clustering, and Gaussian mixtures) to CIFAR-10, NORB, and STL datasets using only single-layer networks. We then present a detailed analysis of the effect of changes in the model setup: the receptive field size, number of hidden nodes (features), the step-size ("stride") between extracted features, and the effect of whitening. Our results show that large numbers of hidden nodes and dense feature extraction are critical to achieving high performance - so critical, in fact, that when these parameters are pushed to their limits, we achieve state-of-the-art performance on both CIFAR-10 and NORB using only a single layer of features. More surprisingly, our best performance is based on K-means clustering, which is extremely fast, has no hyper-parameters to tune beyond the model structure itself, and is very easy to implement. Despite the simplicity of our system, we achieve accuracy beyond all previously published results on the CIFAR-10 and NORB datasets (79.6% and 97.2% respectively).
@inproceedings{coates2011analysis,
author = {Coates, A. and Lee, H. and Ng, A.Y.},
title = {An analysis of single-layer networks in unsupervised feature learning},
editor = {Gordon, Geoffrey and Dunson, David and Dudík, Miroslav},
booktitle = {Proceedings of the Fourteenth International Conference on Artificial Intelligence and Statistics},
series = {JMLR Workshop and Conference Proceedings},
publisher = {JMLR W&CP},
year = {2011},
volume = {15},
pages = {215--223},
url = {http://jmlr.csail.mit.edu/proceedings/papers/v15/coates11a.html},
keywords = {feature, learning, machine, ml, unsupervised},
abstract = {A great deal of research has focused on algorithms for learning features from unlabeled data. Indeed, much progress has been made on benchmark datasets like NORB and CIFAR-10 by employing increasingly complex unsupervised learning algorithms and deep models. In this paper, however, we show that several simple factors, such as the number of hidden nodes in the model, may be more important to achieving high performance than the learning algorithm or the depth of the model. Specifically, we will apply several off-the-shelf feature learning algorithms (sparse auto-encoders, sparse RBMs, K-means clustering, and Gaussian mixtures) to CIFAR-10, NORB, and STL datasets using only single-layer networks. We then present a detailed analysis of the effect of changes in the model setup: the receptive field size, number of hidden nodes (features), the step-size ("stride") between extracted features, and the effect of whitening. Our results show that large numbers of hidden nodes and dense feature extraction are critical to achieving high performance - so critical, in fact, that when these parameters are pushed to their limits, we achieve state-of-the-art performance on both CIFAR-10 and NORB using only a single layer of features. More surprisingly, our best performance is based on K-means clustering, which is extremely fast, has no hyper-parameters to tune beyond the model structure itself, and is very easy to implement. Despite the simplicity of our system, we achieve accuracy beyond all previously published results on the CIFAR-10 and NORB datasets (79.6% and 97.2% respectively).}
}
Coates, A.; Carpenter, B.; Case, C.; Satheesh, S.; Suresh, B.; Wang, T.; Wu, D. & Ng, A.: Text Detection and Character Recognition in Scene Images with Unsupervised Feature Learning. International Conference on Document Analysis and Recognition (ICDAR). 2011, S. 440-445
[Volltext]
Reading text from photographs is a challenging problem that has received a significant amount of attention. Two key components of most systems are (i) text detection from images and (ii) character recognition, and many recent methods have been proposed to design better feature representations and models for both. In this paper, we apply methods recently developed in machine learning - specifically, large-scale algorithms for learning the features automatically from unlabeled data - and show that they allow us to construct highly effective classifiers for both detection and recognition to be used in a high accuracy end-to-end system.
@inproceedings{coates2011detection,
author = {Coates, A. and Carpenter, B. and Case, C. and Satheesh, S. and Suresh, B. and Wang, Tao and Wu, D.J. and Ng, A.Y.},
title = {Text Detection and Character Recognition in Scene Images with Unsupervised Feature Learning},
booktitle = {International Conference on Document Analysis and Recognition (ICDAR)},
year = {2011},
pages = {440--445},
url = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=6065350&tag=1},
doi = {10.1109/ICDAR.2011.95},
keywords = {feature, learning, machine, ml, ocr},
abstract = {Reading text from photographs is a challenging problem that has received a significant amount of attention. Two key components of most systems are (i) text detection from images and (ii) character recognition, and many recent methods have been proposed to design better feature representations and models for both. In this paper, we apply methods recently developed in machine learning -- specifically, large-scale algorithms for learning the features automatically from unlabeled data -- and show that they allow us to construct highly effective classifiers for both detection and recognition to be used in a high accuracy end-to-end system.}
}
Rubens, N.; Kaplan, D. & Sugiyama, M.: Active Learning in Recommender Systems. In: Ricci, F.; Rokach, L.; Shapira, B. & Kantor, P. B. (Hrsg.): Recommender Systems Handbook. Springer US, 2011, S. 735-767
[Volltext]
@incollection{rubens2011active,
author = {Rubens, Neil and Kaplan, Dain and Sugiyama, Masashi},
title = {Active Learning in Recommender Systems},
editor = {Ricci, Francesco and Rokach, Lior and Shapira, Bracha and Kantor, Paul B.},
booktitle = {Recommender Systems Handbook},
publisher = {Springer US},
year = {2011},
pages = {735--767},
url = {http://dx.doi.org/10.1007/978-0-387-85820-3_23},
doi = {10.1007/978-0-387-85820-3_23},
isbn = {978-0-387-85819-7},
keywords = {active, learning, machine, recommender}
}
Scholz, C.; Doerfel, S.; Atzmueller, M.; Hotho, A. & Stumme, G.: Resource-Aware On-Line RFID Localization Using Proximity Data. 2011
@inproceedings{DBLP:conf/pkdd/ADHSS11,
author = {Scholz, Christoph and Doerfel, Stephan and Atzmueller, Martin and Hotho, Andreas and Stumme, Gerd},
title = {Resource-Aware On-Line RFID Localization Using Proximity Data},
year = {2011},
keywords = {2011, data, itegpub, learning, localization, machine, mining, myown, rfid}
}
Mitchell, T. M.: Machine learning. New York, NY [u.a.: McGraw-Hill, 2010
[Volltext]
@book{mitchell2010machine,
author = {Mitchell, Tom M.},
title = {Machine learning},
publisher = {McGraw-Hill},
address = {New York, NY [u.a.},
year = {2010},
url = {http://www.amazon.com/Machine-Learning-Tom-M-Mitchell/dp/0070428077},
isbn = {0071154671 9780071154673},
keywords = {Mitchell, book, info2.0, learning, machine}
}
Raykar, V. C.; Yu, S.; Zhao, L. H.; Valadez, G. H.; Florin, C.; Bogoni, L. & Moy, L.: Learning From Crowds. In: Journal of Machine Learning Research 11 (2010), S. 1297-1322
[Volltext]
For many supervised learning tasks it may be infeasible (or very expensive) to obtain objective and reliable labels. Instead, we can collect subjective (possibly noisy) labels from multiple experts or annotators. In practice, there is a substantial amount of disagreement among the annotators, and hence it is of great practical interest to address conventional supervised learning problems in this scenario. In this paper we describe a probabilistic approach for supervised learning when we have multiple annotators providing (possibly noisy) labels but no absolute gold standard. The proposed algorithm evaluates the different experts and also gives an estimate of the actual hidden labels. Experimental results indicate that the proposed method is superior to the commonly used majority voting baseline.
@article{raykar2010learning,
author = {Raykar, Vikas C. and Yu, Shipeng and Zhao, Linda H. and Valadez, Gerardo Hermosillo and Florin, Charles and Bogoni, Luca and Moy, Linda},
title = {Learning From Crowds},
journal = {Journal of Machine Learning Research},
publisher = {JMLR.org},
year = {2010},
volume = {11},
pages = {1297--1322},
url = {http://dl.acm.org/citation.cfm?id=1756006.1859894},
keywords = {cirg, collective, computing, crowdsourcing, extraction, human, ie, information, intelligence, learning, machine, ml, social},
abstract = {For many supervised learning tasks it may be infeasible (or very expensive) to obtain objective and reliable labels. Instead, we can collect subjective (possibly noisy) labels from multiple experts or annotators. In practice, there is a substantial amount of disagreement among the annotators, and hence it is of great practical interest to address conventional supervised learning problems in this scenario. In this paper we describe a probabilistic approach for supervised learning when we have multiple annotators providing (possibly noisy) labels but no absolute gold standard. The proposed algorithm evaluates the different experts and also gives an estimate of the actual hidden labels. Experimental results indicate that the proposed method is superior to the commonly used majority voting baseline.}
}
Gerland, P.; Schulte, H. & Kroll, A.: Probability-based global state detection of complex technical systems
nd application to mobile working machines. European Control Conference (ECC). Budapest, Hungary: IEEE, 2009, S. 1269-1274
Frequently, the increasing level of automation requires a systematic
onsideration of numerous interacting components influenced by internal
eedback mechanisms as well as interactions with human operators
nder varying environmental conditions. This places demands on modeling,
hich in general cannot be satisfied by traditional modeling concepts.
n this paper, a model approach for complex technical systems is
eveloped, which consists of two language layers. The first layer
akes the physical knowledge of the technical system into account.
he second one uses stochastic models in combination with superordinate
igher-level terms to formulate properties concerning the overall
ystem. By means of a case study dealing with the automation of mobile
orking machines, it is shown that this approach can be used to recognize
riving situations under varying environmental conditions.
@inproceedings{Gerland2009ECC,
author = {Gerland, Patrick and Schulte, Horst and Kroll, Andreas},
title = {Probability-based global state detection of complex technical systems
nd application to mobile working machines},
booktitle = {European Control Conference (ECC)},
publisher = {IEEE},
address = {Budapest, Hungary},
year = {2009},
pages = {1269--1274},
isbn = {978-963-311-369-1},
keywords = {Automotive, Complex, Machine, learning;, systems;},
abstract = {Frequently, the increasing level of automation requires a systematic
onsideration of numerous interacting components influenced by internal
eedback mechanisms as well as interactions with human operators
nder varying environmental conditions. This places demands on modeling,
hich in general cannot be satisfied by traditional modeling concepts.
n this paper, a model approach for complex technical systems is
eveloped, which consists of two language layers. The first layer
akes the physical knowledge of the technical system into account.
he second one uses stochastic models in combination with superordinate
igher-level terms to formulate properties concerning the overall
ystem. By means of a case study dealing with the automation of mobile
orking machines, it is shown that this approach can be used to recognize
riving situations under varying environmental conditions.}
}
Gerland, P.; Schulte, H. & Kroll, A.: Probability-based global state detection of complex technical systems and application to mobile working machines. European Control Conference (ECC). Budapest, Hungary: IEEE, 2009, S. 1269 - 1274
Frequently, the increasing level of automation requires a systematic consideration of numerous interacting components influenced by internal feedback mechanisms as well as interactions with human operators under varying environmental conditions. This places demands on modeling, which in general cannot be satisfied by traditional modeling concepts. In this paper, a model approach for complex technical systems is developed, which consists of two language layers. The first layer takes the physical knowledge of the technical system into account. The second one uses stochastic models in combination with superordinate higher-level terms to formulate properties concerning the overall system. By means of a case study dealing with the automation of mobile working machines, it is shown that this approach can be used to recognize driving situations under varying environmental conditions.
@inproceedings{Gerland2009ECC,
author = {Gerland, Patrick and Schulte, Horst and Kroll, Andreas},
title = {Probability-based global state detection of complex technical systems and application to mobile working machines},
booktitle = {European Control Conference (ECC)},
publisher = {IEEE},
address = {Budapest, Hungary},
year = {2009},
pages = {1269 -- 1274},
isbn = {978-963-311-369-1},
keywords = {Automotive, Complex, Machine, learning;, systems;},
abstract = {Frequently, the increasing level of automation requires a systematic consideration of numerous interacting components influenced by internal feedback mechanisms as well as interactions with human operators under varying environmental conditions. This places demands on modeling, which in general cannot be satisfied by traditional modeling concepts. In this paper, a model approach for complex technical systems is developed, which consists of two language layers. The first layer takes the physical knowledge of the technical system into account. The second one uses stochastic models in combination with superordinate higher-level terms to formulate properties concerning the overall system. By means of a case study dealing with the automation of mobile working machines, it is shown that this approach can be used to recognize driving situations under varying environmental conditions.}
}
Illig, J.: Machine Learnability Analysis of Textclassifications in a Social Bookmarking Folksonomy. Kassel, University of Kassel, Bachelor Thesis, 2008
@mastersthesis{illig2008machine,
author = {Illig, Jens},
title = {Machine Learnability Analysis of Textclassifications in a Social Bookmarking Folksonomy},
school = {University of Kassel},
address = {Kassel},
type = {Bachelor Thesis},
year = {2008},
keywords = {classification, learning, machine, recommendations, recommender, tags}
}
Illig, J.: Machine Learnability Analysis of Textclassifications in a Social Bookmarking Folksonomy. Kassel, University of Kassel, Bachelor Thesis, 2008
@mastersthesis{illig2008machine,
author = {Illig, Jens},
title = {Machine Learnability Analysis of Textclassifications in a Social Bookmarking Folksonomy},
school = {University of Kassel},
address = {Kassel},
type = {Bachelor Thesis},
year = {2008},
keywords = {Illig, bachelor, classification, learning, machine, recommendations, text}
}
Getoor, B. T. L. (Hrsg.): Introduction to Statistical Relational Learning (Adaptive Computation and Machine Learning) . 2007
@book{srl2007,,
title = {Introduction to Statistical Relational Learning (Adaptive Computation and Machine Learning) },
editor = {Getoor, Ben Taskar Lise},
year = {2007},
keywords = {2007, KI2007WebMining, learning, machine, stat, statistical}
}
Tsoumakas, G. & Katakis, I.: Multi Label Classification: An Overview. In: International Journal of Data Warehouse and Mining 3 (2007), Nr. 3, S. 1-13
Nowadays, multi-label classification methods are increasingly required by modern applications, such as protein function classification, music categorization and semantic scene classification. This paper introduces the task of multi-label classification, organizes the sparse related literature into a structured presentation and performs comparative experimental results of certain multi-label classification methods. It also contributes the definition of concepts for the quantification of the multi-label nature of a data set.
@article{citeulike:2146554,
author = {Tsoumakas, G. and Katakis, I.},
title = {Multi Label Classification: An Overview},
editor = {Taniar, David},
journal = {International Journal of Data Warehouse and Mining},
publisher = {Idea Group Publishing},
year = {2007},
volume = {3},
number = {3},
pages = {1--13},
keywords = {classification, learning, machine, ml, multi_label, survey},
abstract = {Nowadays, multi-label classification methods are increasingly required by modern applications, such as protein function classification, music categorization and semantic scene classification. This paper introduces the task of multi-label classification, organizes the sparse related literature into a structured presentation and performs comparative experimental results of certain multi-label classification methods. It also contributes the definition of concepts for the quantification of the multi-label nature of a data set.}
}
Ciravegna, F. & Chapman, S.: Mining the Semantic Web: Requirements for machine learning. Proceedings of the Dagstuhl Seminar in Machine Learning for the Semantic Web. 2005
[Volltext]
@inproceedings{Chapman05b,
author = {Ciravegna, Fabio and Chapman, Sam},
title = {Mining the Semantic Web: Requirements for machine learning},
booktitle = {Proceedings of the Dagstuhl Seminar in Machine Learning for the Semantic Web},
year = {2005},
url = {http://www.smi.ucd.ie/Dagstuhl-MLSW/proceedings/ciravegna-chapman.pdf},
keywords = {web, learning, ontology, knowledge, population, integration, semantic, machine, acquisition}
}
McCallum, A. K.: MALLET: A Machine Learning for Language Toolkit. , 2002
[Volltext]
@unpublished{McCallumMALLET,
author = {McCallum, Andrew Kachites},
title = {MALLET: A Machine Learning for Language Toolkit},
year = {2002},
note = {http://mallet.cs.umass.edu},
url = {http://mallet.cs.umass.edu},
keywords = {extraction, learning, information, mallet, machine}
}
Thrun, S.; Burgard, W. & Fox, D.: Probabilistic Robotics (Intelligent Robotics and Autonomous Agents). 2001
[Volltext]
@book{thrun2001,
author = {Thrun, Sebastian and Burgard, Wolfram and Fox, Dieter},
title = {Probabilistic Robotics (Intelligent Robotics and Autonomous Agents)},
year = {2001},
url = {http://www.amazon.com/Probabilistic-Robotics-Intelligent-Autonomous-Agents/dp/0262201623/ref=sr_11_1/105-3361811-4085215?ie=UTF8&qid=1190743235&sr=11-1},
keywords = {dm, learning, machine, ml, probabilistic}
}
Leake, D. B.: Case-based reasoning. In: Ralston, A.; Reilly, E. D. & Hemmendinger, D. (Hrsg.): Encyclopedia of Computer Science. 4th. Aufl. Chichester, UK: John Wiley and Sons Ltd., 2000, S. 196-197
[Volltext]
Case-based reasoning(CBR) is an artificial intelligence paradigm for reasoning and learning. Case-based reasoning solves new problems by retrieving stored records of prior problem-solving episodes (cases) and adapting their solutions to fit new circumstances. Each processing episode provides a new case that is stored for future reuse, making learning a natural side-effect of the reasoning process. Case-based reasoning is also studied within cognitive science as a model of human reasoning: studies show that people use recollections of prior problems to guide their reasoning in a wide range of tasks, such as programming, mathematical problem solving, diagnosis, decision making, and design.
@incollection{leake2000casebased,
author = {Leake, David B.},
title = {Case-based reasoning},
editor = {Ralston, Anthony and Reilly, Edwin D. and Hemmendinger, David},
booktitle = {Encyclopedia of Computer Science},
publisher = {John Wiley and Sons Ltd.},
address = {Chichester, UK},
year = {2000},
pages = {196--197},
edition = {4th},
url = {http://dl.acm.org/citation.cfm?id=1074100.1074199},
isbn = {0-470-86412-5},
keywords = {based, case, cbr, learning, machine, ml, reasoning},
abstract = {Case-based reasoning(CBR) is an artificial intelligence paradigm for reasoning and learning. Case-based reasoning solves new problems by retrieving stored records of prior problem-solving episodes (cases) and adapting their solutions to fit new circumstances. Each processing episode provides a new case that is stored for future reuse, making learning a natural side-effect of the reasoning process. Case-based reasoning is also studied within cognitive science as a model of human reasoning: studies show that people use recollections of prior problems to guide their reasoning in a wide range of tasks, such as programming, mathematical problem solving, diagnosis, decision making, and design.}
}
Newman, C. B. D. & Merz, C.: UCI Repository of machine learning databases. , 1998
[Volltext]
@misc{Newman+Hettich+Blake+Merz:1998,
author = {Newman, C.L. Blake D.J. and Merz, C.J.},
title = {UCI Repository of machine learning databases},
year = {1998},
url = {http://www.ics.uci.edu/$mlearn/MLRepository.html},
keywords = {learning, data, dataset, dm, mining, machine, ml, uci}
}
Cohen, W. W. & Hirsh, H.: Learning the Classic Description Logic: Theoretical and Experimental Results.. KR. 1994, S. 121-133
[Volltext]
@inproceedings{conf/kr/CohenH94,
author = {Cohen, William W. and Hirsh, Haym},
title = {Learning the Classic Description Logic: Theoretical and Experimental Results.},
booktitle = {KR},
year = {1994},
pages = {121-133},
url = {http://www.cs.cmu.edu/~wcohen/postscript/kr-94.ps},
isbn = {1-55860-554-1},
keywords = {learning, logic, description, machine, ml, dl}
}
Cohen, W. W. & Hirsh, H.: The Learnability of Description Logics with Equality Constraints.. In: Machine Learning 17 (1994), Nr. 2-3, S. 169-199
[Volltext]
@article{journals/ml/CohenH94,
author = {Cohen, William W. and Hirsh, Haym},
title = {The Learnability of Description Logics with Equality Constraints.},
journal = {Machine Learning},
year = {1994},
volume = {17},
number = {2-3},
pages = {169-199},
url = {http://www.cs.cmu.edu/~wcohen/postscript/kr-94.ps},
keywords = {learning, logic, description, machine, ml, dl}
}
Sowa, J. F.: Conceptual Structures: Information Processing in Mind and Machine. Reading, MA: Addison-Wesley Publishing Company, 1984
@book{sowa84,
author = {Sowa, J. F.},
title = {Conceptual Structures: Information Processing in Mind and Machine},
publisher = {Addison-Wesley Publishing Company},
address = {Reading, MA},
year = {1984},
keywords = {processing, information, structures, machine, mind, conceptual}
}