@article{mnih2015humanlevel,
  author = {Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Rusu, Andrei A. and Veness, Joel and Bellemare, Marc G. and Graves, Alex and Riedmiller, Martin and Fidjeland, Andreas K. and Ostrovski, Georg and Petersen, Stig and Beattie, Charles and Sadik, Amir and Antonoglou, Ioannis and King, Helen and Kumaran, Dharshan and Wierstra, Daan and Legg, Shane and Hassabis, Demis},
  interhash = {eac59980357d99db87b341b61ef6645f},
  intrahash = {fb15f4471c81dc2b9edf2304cb2f7083},
  issn = {00280836},
  journal = {Nature},
  month = feb,
  number = 7540,
  pages = {529--533},
  publisher = {Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.},
  title = {Human-level control through deep reinforcement learning},
  url = {http://dx.doi.org/10.1038/nature14236},
  volume = 518,
  year = 2015
}

@article{kluegl2013exploiting,
  abstract = {Conditional Random Fields (CRF) are popular methods for labeling unstructured or textual data. Like many machine learning approaches, these undirected graphical models assume the instances to be independently distributed. However, in real-world applications data is grouped in a natural way, e.g., by its creation context. The instances in each group often share additional structural consistencies. This paper proposes a domain-independent method for exploiting these consistencies by combining two CRFs in a stacked learning framework. We apply rule learning collectively on the predictions of an initial CRF for one context to acquire descriptions of its specific properties. Then, we utilize these descriptions as dynamic and high quality features in an additional (stacked) CRF. The presented approach is evaluated with a real-world dataset for the segmentation of references and achieves a significant reduction of the labeling error.},
  author = {Kluegl, Peter and Toepfer, Martin and Lemmerich, Florian and Hotho, Andreas and Puppe, Frank},
  interhash = {9ef3f543e4cc9e2b0ef078595f92013b},
  intrahash = {fbaab25e96dd20d96ece9d7fefdc3b4f},
  journal = {Mathematical Methodologies in Pattern Recognition and Machine Learning Springer Proceedings in Mathematics & Statistics},
  pages = {111-125},
  title = {Exploiting Structural Consistencies with Stacked Conditional Random Fields},
  volume = 30,
  year = 2013
}

@inproceedings{ring2015condist,
  author = {Ring, Markus and Otto, Florian and Becker, Martin and Niebler, Thomas and Landes, Dieter and Hotho, Andreas},
  editor = {ECMLPKDD2015},
  interhash = {c062a57a17a0910d6c27ecd664502ac1},
  intrahash = {a2f9d649f2856677e4d886a3b517404d},
  title = {ConDist: A Context-Driven Categorical Distance Measure},
  year = 2015
}

@inproceedings{DBLP:conf/dsaa/KrompassNT14,
  author = {Krompass, Denis and Nickel, Maximilian and Tresp, Volker},
  bibsource = {dblp computer science bibliography, http://dblp.org},
  booktitle = {International Conference on Data Science and Advanced Analytics, {DSAA}               2014, Shanghai, China, October 30 - November 1, 2014},
  crossref = {DBLP:conf/dsaa/2014},
  doi = {10.1109/DSAA.2014.7058046},
  interhash = {0ca986606c22ca0b3780c9b9c25f31c7},
  intrahash = {c952ed96ece470e4fa5336eedf670d5b},
  isbn = {978-1-4799-6991-3},
  pages = {18--24},
  publisher = {{IEEE}},
  title = {Large-scale factorization of type-constrained multi-relational data},
  url = {http://dx.doi.org/10.1109/DSAA.2014.7058046},
  year = 2014
}

@inproceedings{noauthororeditor,
  author = {Mirowski, Piotr and Ranzato, Marc'Aurelio and LeCun, Yann},
  editor = {of the NIPS 2010 Workshop on Deep Learning, Proceedings},
  interhash = {b7ce347e904a4ca3263cf6cc1e2253bd},
  intrahash = {fc3e0e3af595f9a46df6bc9233df836f},
  title = {Dynamic Auto-Encoders for Semantic Indexing},
  url = {http://yann.lecun.com/exdb/publis/pdf/mirowski-nipsdl-10.pdf},
  year = 2010
}

@misc{yu2013largescale,
  abstract = {The multi-label classification problem has generated significant interest in
recent years. However, existing approaches do not adequately address two key
challenges: (a) the ability to tackle problems with a large number (say
millions) of labels, and (b) the ability to handle data with missing labels. In
this paper, we directly address both these problems by studying the multi-label
problem in a generic empirical risk minimization (ERM) framework. Our
framework, despite being simple, is surprisingly able to encompass several
recent label-compression based methods which can be derived as special cases of
our method. To optimize the ERM problem, we develop techniques that exploit the
structure of specific loss functions - such as the squared loss function - to
offer efficient algorithms. We further show that our learning framework admits
formal excess risk bounds even in the presence of missing labels. Our risk
bounds are tight and demonstrate better generalization performance for low-rank
promoting trace-norm regularization when compared to (rank insensitive)
Frobenius norm regularization. Finally, we present extensive empirical results
on a variety of benchmark datasets and show that our methods perform
significantly better than existing label compression based methods and can
scale up to very large datasets such as the Wikipedia dataset.},
  author = {Yu, Hsiang-Fu and Jain, Prateek and Kar, Purushottam and Dhillon, Inderjit S.},
  interhash = {1252173520757338468a68e028494647},
  intrahash = {716e5270c1dcb3a1e4eedf9934859021},
  note = {cite arxiv:1307.5101},
  title = {Large-scale Multi-label Learning with Missing Labels},
  url = {http://arxiv.org/abs/1307.5101},
  year = 2013
}

@incollection{pol_introduction,
  author = {Lehmann, Jens and Voelker, Johanna},
  booktitle = {Perspectives on Ontology Learning},
  editor = {Lehmann, Jens and Voelker, Johanna},
  interhash = {a53a9f1796f71f2f1c5ec646961f8924},
  intrahash = {cf6a6785f5cab0525632a003c47ef5f7},
  owner = {jl},
  pages = {ix-xvi},
  publisher = {AKA / IOS Press},
  title = {An Introduction to Ontology Learning},
  url = {http://jens-lehmann.org/files/2014/pol_introduction.pdf},
  year = 2014
}

@inproceedings{mitchell2015,
  author = {Mitchell, T. and Cohen, W. and Hruscha, E. and Talukdar, P. and Betteridge, J. and Carlson, A. and Dalvi, B. and Gardner, M. and Kisiel, B. and Krishnamurthy, J. and Lao, N. and Mazaitis, K. and Mohammad, T. and Nakashole, N. and Platanios, E. and Ritter, A. and Samadi, M. and Settles, B. and Wang, R. and Wijaya, D. and Gupta, A. and Chen, X. and Saparov, A. and Greaves, M. and Welling, J.},
  booktitle = {AAAI},
  interhash = {52d0d71f6f5b332dabc1412f18e3a93d},
  intrahash = {63070703e6bb812852cca56574aed093},
  note = {: Never-Ending Learning in AAAI-2015},
  title = {Never-Ending Learning},
  url = {http://www.cs.cmu.edu/~wcohen/pubs.html},
  year = 2015
}

@inproceedings{Carlson10,
  author = {Carlson, A. and Betteridge, J. and Kisiel, B. and Settles, B. and Jr., E.R. Hruschka and Mitchell, T.M.},
  booktitle = {Proceedings of the Conference on Artificial Intelligence (AAAI)},
  interhash = {5df31649862b1002848792cd495d46dc},
  intrahash = {f0d94ab9d299609ee92f6ecf555266d4},
  pages = {1306--1313},
  publisher = {AAAI Press},
  title = {Toward an Architecture for Never-Ending Language Learning},
  year = 2010
}

@article{cimiano05learning,
  author = {Cimiano, Philipp and Hotho, Andreas and Staab, Steffen},
  ee = {http://www.jair.org/papers/paper1648.html},
  interhash = {4c09568cff62babd362aab03095f4589},
  intrahash = {eaaf0e4b3a8b29fab23b6c15ce2d308d},
  journal = {Journal on Artificial Intelligence Research},
  pages = {305-339},
  title = {Learning Concept Hierarchies from Text Corpora using Formal Concept Analysis},
  url = {http://dblp.uni-trier.de/db/journals/jair/jair24.html#CimianoHS05},
  volume = 24,
  year = 2005
}

@inproceedings{conf/pkdd/BalasubramanyanDC13,
  author = {Balasubramanyan, Ramnath and Dalvi, Bhavana Bharat and Cohen, William W.},
  booktitle = {ECML/PKDD (2)},
  crossref = {conf/pkdd/2013-2},
  editor = {Blockeel, Hendrik and Kersting, Kristian and Nijssen, Siegfried and Zelezný, Filip},
  ee = {http://dx.doi.org/10.1007/978-3-642-40991-2_40},
  interhash = {9a32b7cc059a500ea302d0aa65036682},
  intrahash = {e56623d21a1b7bcb442cd15fe098bb70},
  isbn = {978-3-642-40990-5},
  pages = {628-642},
  publisher = {Springer},
  series = {Lecture Notes in Computer Science},
  title = {From Topic Models to Semi-supervised Learning: Biasing Mixed-Membership Models to Exploit Topic-Indicative Features in Entity Clustering.},
  url = {http://dblp.uni-trier.de/db/conf/pkdd/pkdd2013-2.html#BalasubramanyanDC13},
  volume = 8189,
  year = 2013
}

@book{mitchell2010machine,
  address = {New York, NY [u.a.},
  author = {Mitchell, Tom M.},
  interhash = {8be657b11d4324941ba419c176c0229a},
  intrahash = {adfebd1b18f04021ba0edd69ccaa3d96},
  isbn = {0071154671 9780071154673},
  publisher = {McGraw-Hill},
  refid = {846511832},
  title = {Machine learning},
  url = {http://www.amazon.com/Machine-Learning-Tom-M-Mitchell/dp/0070428077},
  year = 2010
}

@inproceedings{joachims99,
  address = {Cambridge, MA, USA},
  author = {Joachims, Thorsten},
  booktitle = {Advances in Kernel Methods - Support Vector Learning},
  editor = {Sch\"olkopf, Bernhard and Burges, Christopher J.C. and Smola, A.},
  interhash = {f97179c7ebe10f64411417f9e05563a8},
  intrahash = {dc79351cca889847d9d20c7ef9dafa25},
  publisher = {MIT Press},
  title = {{Making Large-Scale SVM Learning Practical}},
  year = 1999
}

@inproceedings{coates2011analysis,
  abstract = {A great deal of research has focused on algorithms for learning features from unlabeled data. Indeed, much progress has been made on benchmark datasets like NORB and CIFAR-10 by employing increasingly complex unsupervised learning algorithms and deep models. In this paper, however, we show that several simple factors, such as the number of hidden nodes in the model, may be more important to achieving high performance than the learning algorithm or the depth of the model. Specifically, we will apply several off-the-shelf feature learning algorithms (sparse auto-encoders, sparse RBMs, K-means clustering, and Gaussian mixtures) to CIFAR-10, NORB, and STL datasets using only single-layer networks. We then present a detailed analysis of the effect of changes in the model setup: the receptive field size, number of hidden nodes (features), the step-size ("stride") between extracted features, and the effect of whitening. Our results show that large numbers of hidden nodes and dense feature extraction are critical to achieving high performance - so critical, in fact, that when these parameters are pushed to their limits, we achieve state-of-the-art performance on both CIFAR-10 and NORB using only a single layer of features. More surprisingly, our best performance is based on K-means clustering, which is extremely fast, has no hyper-parameters to tune beyond the model structure itself, and is very easy to implement. Despite the simplicity of our system, we achieve accuracy beyond all previously published results on the CIFAR-10 and NORB datasets (79.6% and 97.2% respectively).},
  author = {Coates, A. and Lee, H. and Ng, A.Y.},
  booktitle = {Proceedings of the Fourteenth International Conference on Artificial Intelligence and Statistics},
  editor = {Gordon, Geoffrey and Dunson, David and Dudík, Miroslav},
  interhash = {46cfb4b5b1c16c79a966512e07f67158},
  intrahash = {bcb2c1fd335ae57362cdf348ff727589},
  pages = {215--223},
  publisher = {JMLR W\&CP},
  series = {JMLR Workshop and Conference Proceedings},
  title = {An analysis of single-layer networks in unsupervised feature learning},
  url = {http://jmlr.csail.mit.edu/proceedings/papers/v15/coates11a.html},
  volume = 15,
  year = 2011
}

@inproceedings{coates2011detection,
  abstract = {Reading text from photographs is a challenging problem that has received a significant amount of attention. Two key components of most systems are (i) text detection from images and (ii) character recognition, and many recent methods have been proposed to design better feature representations and models for both. In this paper, we apply methods recently developed in machine learning -- specifically, large-scale algorithms for learning the features automatically from unlabeled data -- and show that they allow us to construct highly effective classifiers for both detection and recognition to be used in a high accuracy end-to-end system.},
  author = {Coates, A. and Carpenter, B. and Case, C. and Satheesh, S. and Suresh, B. and Wang, Tao and Wu, D.J. and Ng, A.Y.},
  booktitle = {International Conference on Document Analysis and Recognition (ICDAR)},
  doi = {10.1109/ICDAR.2011.95},
  interhash = {adb17817e5f95605a8066737ce0e8b7e},
  intrahash = {b550ca5ec5a8b61b64b17091f7b2eeab},
  issn = {1520-5363},
  month = sep,
  pages = {440--445},
  title = {Text Detection and Character Recognition in Scene Images with Unsupervised Feature Learning},
  url = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=6065350&tag=1},
  year = 2011
}

@incollection{rubens2011active,
  author = {Rubens, Neil and Kaplan, Dain and Sugiyama, Masashi},
  booktitle = {Recommender Systems Handbook},
  chapter = 23,
  doi = {10.1007/978-0-387-85820-3_23},
  editor = {Ricci, Francesco and Rokach, Lior and Shapira, Bracha and Kantor, Paul B.},
  interhash = {eab8d17924be10a7999ea09e6ed3be59},
  intrahash = {e0b5682c1c228037aee63a459e2e2c62},
  isbn = {978-0-387-85819-7},
  language = {English},
  pages = {735--767},
  publisher = {Springer US},
  title = {Active Learning in Recommender Systems},
  url = {http://dx.doi.org/10.1007/978-0-387-85820-3_23},
  year = 2011
}

@incollection{leake2000casebased,
  abstract = {Case-based reasoning(CBR) is an artificial intelligence paradigm for reasoning and learning. Case-based reasoning solves new problems by retrieving stored records of prior problem-solving episodes (cases) and adapting their solutions to fit new circumstances. Each processing episode provides a new case that is stored for future reuse, making learning a natural side-effect of the reasoning process. Case-based reasoning is also studied within cognitive science as a model of human reasoning: studies show that people use recollections of prior problems to guide their reasoning in a wide range of tasks, such as programming, mathematical problem solving, diagnosis, decision making, and design.},
  acmid = {1074199},
  address = {Chichester, UK},
  author = {Leake, David B.},
  booktitle = {Encyclopedia of Computer Science},
  edition = {4th},
  editor = {Ralston, Anthony and Reilly, Edwin D. and Hemmendinger, David},
  interhash = {fa414e2f48be14bb94cbfbf2566e36af},
  intrahash = {b8526b7c03f1fc9bdd85863dfbf881a2},
  isbn = {0-470-86412-5},
  month = jun,
  numpages = {2},
  pages = {196--197},
  publisher = {John Wiley and Sons Ltd.},
  title = {Case-based reasoning},
  url = {http://dl.acm.org/citation.cfm?id=1074100.1074199},
  year = 2000
}

@inproceedings{hearst1992automatic,
  abstract = {We describe a method for the automatic acquisition of the hyponymy lexical relation from unrestricted text. Two goals motivate the approach: (i) avoidance of the need for pre-encoded knowledge and (ii) applicability across a wide range of text. We identify a set of lexico-syntactic patterns that are easily recognizable, that occur frequently and across text genre boundaries, and that indisputably indicate the lexical relation of interest. We describe a method for discovering these patterns and suggest that other lexical relations will also be acquirable in this way. A subset of the acquisition algorithm is implemented and the results are used to augment and critique the structure of a large hand-built thesaurus. Extensions and applications to areas such as information retrieval are suggested.},
  acmid = {992154},
  address = {Stroudsburg, PA, USA},
  author = {Hearst, Marti A.},
  booktitle = {Proceedings of the 14th conference on Computational linguistics},
  doi = {10.3115/992133.992154},
  interhash = {8c1e90c6cc76625c34f20370a1af7ea2},
  intrahash = {2c49ad19ac6977bd806b6687e4dcc550},
  location = {Nantes, France},
  numpages = {7},
  pages = {539--545},
  publisher = {Association for Computational Linguistics},
  title = {Automatic acquisition of hyponyms from large text corpora},
  url = {http://dx.doi.org/10.3115/992133.992154},
  volume = 2,
  year = 1992
}

@inproceedings{brew2010using,
  abstract = {Tracking sentiment in the popular media has long been of interest to media analysts and pundits. With the availability of news content via online syndicated feeds, it is now possible to automate some aspects of this process. There is also great potential to crowdsource Crowdsourcing is a term, sometimes associated with Web 2.0 technologies, that describes outsourcing of tasks to a large often anonymous community. much of the annotation work that is required to train a machine learning system to perform sentiment scoring. We describe such a system for tracking economic sentiment in online media that has been deployed since August 2009. It uses annotations provided by a cohort of non-expert annotators to train a learning system to classify a large body of news items. We report on the design challenges addressed in managing the effort of the annotators and in making annotation an interesting experience.},
  acmid = {1860997},
  address = {Amsterdam, The Netherlands, The Netherlands},
  author = {Brew, Anthony and Greene, Derek and Cunningham, Pádraig},
  booktitle = {Proceedings of the 19th European Conference on Artificial Intelligence},
  editor = {Coelho, Helder and Studer, Rudi and Wooldridge, Michael},
  interhash = {90650749ea1084b729710d37b5865b72},
  intrahash = {9643e3c5729886b0b4e85cb3d3d704f5},
  isbn = {978-1-60750-605-8},
  numpages = {6},
  pages = {145--150},
  publisher = {IOS Press},
  series = {Frontiers in Artificial Intelligence and Applications},
  title = {Using Crowdsourcing and Active Learning to Track Sentiment in Online Media},
  url = {http://dl.acm.org/citation.cfm?id=1860967.1860997},
  volume = 215,
  year = 2010
}

@article{raykar2010learning,
  abstract = {For many supervised learning tasks it may be infeasible (or very expensive) to obtain objective and reliable labels. Instead, we can collect subjective (possibly noisy) labels from multiple experts or annotators. In practice, there is a substantial amount of disagreement among the annotators, and hence it is of great practical interest to address conventional supervised learning problems in this scenario. In this paper we describe a probabilistic approach for supervised learning when we have multiple annotators providing (possibly noisy) labels but no absolute gold standard. The proposed algorithm evaluates the different experts and also gives an estimate of the actual hidden labels. Experimental results indicate that the proposed method is superior to the commonly used majority voting baseline.},
  acmid = {1859894},
  author = {Raykar, Vikas C. and Yu, Shipeng and Zhao, Linda H. and Valadez, Gerardo Hermosillo and Florin, Charles and Bogoni, Luca and Moy, Linda},
  interhash = {8113daf47997fddf48e4c6c79f2eba56},
  intrahash = {14220abe8babfab01c0cdd5ebd5e4b7c},
  issn = {1532-4435},
  issue_date = {3/1/2010},
  journal = {Journal of Machine Learning Research},
  month = aug,
  numpages = {26},
  pages = {1297--1322},
  publisher = {JMLR.org},
  title = {Learning From Crowds},
  url = {http://dl.acm.org/citation.cfm?id=1756006.1859894},
  volume = 11,
  year = 2010
}