@inproceedings{nivarthi2023towards, abstract = {Anomaly detection plays a pivotal role in diverse realworld applications such as cybersecurity, fault detection, network monitoring, predictive maintenance, and highly automated driving. However, obtaining labeled anomalous data can be a formidable challenge, especially when anomalies exhibit temporal evolution. This paper introduces LATAM (Long short-term memory Autoencoder with Temporal Attention Mechanism) for few-shot anomaly detection, with the aim of enhancing detection performance in scenarios with limited labeled anomaly data. LATAM effectively captures temporal dependencies and emphasizes significant patterns in multivariate time series data. In our investigation, we comprehensively evaluate LATAM against other anomaly detection models, particularly assessing its capability in few-shot learning scenarios where we have minimal examples from the normal class and none from the anomalous class in the training data. Our experimental results, derived from real-world photovoltaic inverter data, highlight LATAM’s superiority, showcasing a substantial 27% mean F1 score improvement, even when trained on a mere two-week dataset. Furthermore, LATAM demonstrates remarkable results on the open-source SWaT dataset, achieving a 12% boost in accuracy with only two days of training data. Moreover, we introduce a simple yet effective dynamic thresholding mechanism, further enhancing the anomaly detection capabilities of LATAM. This underscores LATAM’s efficacy in addressing the challenges posed by limited labeled anomalies in practical scenarios and it proves valuable for downstream tasks involving temporal representation and time series prediction, extending its utility beyond anomaly detection applications.}, author = {Nivarthi, Chandana Priya and Sick, Bernhard}, booktitle = {International Conference on Machine Learning and Applications (ICMLA)}, doi = {10.1109/ICMLA58977.2023.00218}, interhash = {2c7b944a23ce00dd5e4637ce2c572f31}, intrahash = {a4a29acb67656f837ca6e532fc88958d}, pages = {1444--1450}, publisher = {IEEE}, title = {Towards Few-Shot Time Series Anomaly Detection with Temporal Attention and Dynamic Thresholding}, year = 2023 } @article{mnih2015humanlevel, author = {Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Rusu, Andrei A. and Veness, Joel and Bellemare, Marc G. and Graves, Alex and Riedmiller, Martin and Fidjeland, Andreas K. and Ostrovski, Georg and Petersen, Stig and Beattie, Charles and Sadik, Amir and Antonoglou, Ioannis and King, Helen and Kumaran, Dharshan and Wierstra, Daan and Legg, Shane and Hassabis, Demis}, interhash = {eac59980357d99db87b341b61ef6645f}, intrahash = {fb15f4471c81dc2b9edf2304cb2f7083}, issn = {00280836}, journal = {Nature}, month = feb, number = 7540, pages = {529--533}, publisher = {Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.}, title = {Human-level control through deep reinforcement learning}, url = {http://dx.doi.org/10.1038/nature14236}, volume = 518, year = 2015 } @article{kluegl2013exploiting, abstract = {Conditional Random Fields (CRF) are popular methods for labeling unstructured or textual data. Like many machine learning approaches, these undirected graphical models assume the instances to be independently distributed. However, in real-world applications data is grouped in a natural way, e.g., by its creation context. The instances in each group often share additional structural consistencies. This paper proposes a domain-independent method for exploiting these consistencies by combining two CRFs in a stacked learning framework. We apply rule learning collectively on the predictions of an initial CRF for one context to acquire descriptions of its specific properties. Then, we utilize these descriptions as dynamic and high quality features in an additional (stacked) CRF. The presented approach is evaluated with a real-world dataset for the segmentation of references and achieves a significant reduction of the labeling error.}, author = {Kluegl, Peter and Toepfer, Martin and Lemmerich, Florian and Hotho, Andreas and Puppe, Frank}, interhash = {9ef3f543e4cc9e2b0ef078595f92013b}, intrahash = {fbaab25e96dd20d96ece9d7fefdc3b4f}, journal = {Mathematical Methodologies in Pattern Recognition and Machine Learning Springer Proceedings in Mathematics & Statistics}, pages = {111-125}, title = {Exploiting Structural Consistencies with Stacked Conditional Random Fields}, volume = 30, year = 2013 } @inproceedings{ring2015condist, author = {Ring, Markus and Otto, Florian and Becker, Martin and Niebler, Thomas and Landes, Dieter and Hotho, Andreas}, editor = {ECMLPKDD2015}, interhash = {c062a57a17a0910d6c27ecd664502ac1}, intrahash = {a2f9d649f2856677e4d886a3b517404d}, title = {ConDist: A Context-Driven Categorical Distance Measure}, year = 2015 } @inproceedings{DBLP:conf/dsaa/KrompassNT14, author = {Krompass, Denis and Nickel, Maximilian and Tresp, Volker}, bibsource = {dblp computer science bibliography, http://dblp.org}, booktitle = {International Conference on Data Science and Advanced Analytics, {DSAA} 2014, Shanghai, China, October 30 - November 1, 2014}, crossref = {DBLP:conf/dsaa/2014}, doi = {10.1109/DSAA.2014.7058046}, interhash = {0ca986606c22ca0b3780c9b9c25f31c7}, intrahash = {c952ed96ece470e4fa5336eedf670d5b}, isbn = {978-1-4799-6991-3}, pages = {18--24}, publisher = {{IEEE}}, title = {Large-scale factorization of type-constrained multi-relational data}, url = {http://dx.doi.org/10.1109/DSAA.2014.7058046}, year = 2014 } @inproceedings{noauthororeditor, author = {Mirowski, Piotr and Ranzato, Marc'Aurelio and LeCun, Yann}, editor = {of the NIPS 2010 Workshop on Deep Learning, Proceedings}, interhash = {b7ce347e904a4ca3263cf6cc1e2253bd}, intrahash = {fc3e0e3af595f9a46df6bc9233df836f}, title = {Dynamic Auto-Encoders for Semantic Indexing}, url = {http://yann.lecun.com/exdb/publis/pdf/mirowski-nipsdl-10.pdf}, year = 2010 } @article{breiman2001random, abstract = {Random forests are a combination of tree predictors such that each tree depends on the values of a random vector sampled independently and with the same distribution for all trees in the forest. The generalization error for forests converges a.s. to a limit as the number of trees in the forest becomes large. The generalization error of a forest of tree classifiers depends on the strength of the individual trees in the forest and the correlation between them. Using a random selection of features to split each node yields error rates that compare favorably to }, author = {Breiman, Leo}, doi = {10.1023/A:1010933404324}, interhash = {4450d2e56555e7cb8f3817578e1dd4da}, intrahash = {b8187107bf870043f2f93669958858f1}, issn = {0885-6125}, journal = {Machine Learning}, language = {English}, number = 1, pages = {5-32}, publisher = {Kluwer Academic Publishers}, title = {Random Forests}, url = {http://dx.doi.org/10.1023/A%3A1010933404324}, volume = 45, year = 2001 } @misc{yu2013largescale, abstract = {The multi-label classification problem has generated significant interest in recent years. However, existing approaches do not adequately address two key challenges: (a) the ability to tackle problems with a large number (say millions) of labels, and (b) the ability to handle data with missing labels. In this paper, we directly address both these problems by studying the multi-label problem in a generic empirical risk minimization (ERM) framework. Our framework, despite being simple, is surprisingly able to encompass several recent label-compression based methods which can be derived as special cases of our method. To optimize the ERM problem, we develop techniques that exploit the structure of specific loss functions - such as the squared loss function - to offer efficient algorithms. We further show that our learning framework admits formal excess risk bounds even in the presence of missing labels. Our risk bounds are tight and demonstrate better generalization performance for low-rank promoting trace-norm regularization when compared to (rank insensitive) Frobenius norm regularization. Finally, we present extensive empirical results on a variety of benchmark datasets and show that our methods perform significantly better than existing label compression based methods and can scale up to very large datasets such as the Wikipedia dataset.}, author = {Yu, Hsiang-Fu and Jain, Prateek and Kar, Purushottam and Dhillon, Inderjit S.}, interhash = {1252173520757338468a68e028494647}, intrahash = {716e5270c1dcb3a1e4eedf9934859021}, note = {cite arxiv:1307.5101}, title = {Large-scale Multi-label Learning with Missing Labels}, url = {http://arxiv.org/abs/1307.5101}, year = 2013 } @incollection{pol_introduction, author = {Lehmann, Jens and Voelker, Johanna}, booktitle = {Perspectives on Ontology Learning}, editor = {Lehmann, Jens and Voelker, Johanna}, interhash = {a53a9f1796f71f2f1c5ec646961f8924}, intrahash = {cf6a6785f5cab0525632a003c47ef5f7}, owner = {jl}, pages = {ix-xvi}, publisher = {AKA / IOS Press}, title = {An Introduction to Ontology Learning}, url = {http://jens-lehmann.org/files/2014/pol_introduction.pdf}, year = 2014 } @inproceedings{mitchell2015, author = {Mitchell, T. and Cohen, W. and Hruscha, E. and Talukdar, P. and Betteridge, J. and Carlson, A. and Dalvi, B. and Gardner, M. and Kisiel, B. and Krishnamurthy, J. and Lao, N. and Mazaitis, K. and Mohammad, T. and Nakashole, N. and Platanios, E. and Ritter, A. and Samadi, M. and Settles, B. and Wang, R. and Wijaya, D. and Gupta, A. and Chen, X. and Saparov, A. and Greaves, M. and Welling, J.}, booktitle = {AAAI}, interhash = {52d0d71f6f5b332dabc1412f18e3a93d}, intrahash = {63070703e6bb812852cca56574aed093}, note = {: Never-Ending Learning in AAAI-2015}, title = {Never-Ending Learning}, url = {http://www.cs.cmu.edu/~wcohen/pubs.html}, year = 2015 } @inproceedings{Carlson10, author = {Carlson, A. and Betteridge, J. and Kisiel, B. and Settles, B. and Jr., E.R. Hruschka and Mitchell, T.M.}, booktitle = {Proceedings of the Conference on Artificial Intelligence (AAAI)}, interhash = {5df31649862b1002848792cd495d46dc}, intrahash = {f0d94ab9d299609ee92f6ecf555266d4}, pages = {1306--1313}, publisher = {AAAI Press}, title = {Toward an Architecture for Never-Ending Language Learning}, year = 2010 } @article{cimiano05learning, author = {Cimiano, Philipp and Hotho, Andreas and Staab, Steffen}, ee = {http://www.jair.org/papers/paper1648.html}, interhash = {4c09568cff62babd362aab03095f4589}, intrahash = {eaaf0e4b3a8b29fab23b6c15ce2d308d}, journal = {Journal on Artificial Intelligence Research}, pages = {305-339}, title = {Learning Concept Hierarchies from Text Corpora using Formal Concept Analysis}, url = {http://dblp.uni-trier.de/db/journals/jair/jair24.html#CimianoHS05}, volume = 24, year = 2005 } @inproceedings{conf/pkdd/BalasubramanyanDC13, author = {Balasubramanyan, Ramnath and Dalvi, Bhavana Bharat and Cohen, William W.}, booktitle = {ECML/PKDD (2)}, crossref = {conf/pkdd/2013-2}, editor = {Blockeel, Hendrik and Kersting, Kristian and Nijssen, Siegfried and Zelezný, Filip}, ee = {http://dx.doi.org/10.1007/978-3-642-40991-2_40}, interhash = {9a32b7cc059a500ea302d0aa65036682}, intrahash = {e56623d21a1b7bcb442cd15fe098bb70}, isbn = {978-3-642-40990-5}, pages = {628-642}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, title = {From Topic Models to Semi-supervised Learning: Biasing Mixed-Membership Models to Exploit Topic-Indicative Features in Entity Clustering.}, url = {http://dblp.uni-trier.de/db/conf/pkdd/pkdd2013-2.html#BalasubramanyanDC13}, volume = 8189, year = 2013 } @book{mitchell2010machine, address = {New York, NY [u.a.}, author = {Mitchell, Tom M.}, interhash = {8be657b11d4324941ba419c176c0229a}, intrahash = {adfebd1b18f04021ba0edd69ccaa3d96}, isbn = {0071154671 9780071154673}, publisher = {McGraw-Hill}, refid = {846511832}, title = {Machine learning}, url = {http://www.amazon.com/Machine-Learning-Tom-M-Mitchell/dp/0070428077}, year = 2010 } @inproceedings{joachims99, address = {Cambridge, MA, USA}, author = {Joachims, Thorsten}, booktitle = {Advances in Kernel Methods - Support Vector Learning}, editor = {Sch\"olkopf, Bernhard and Burges, Christopher J.C. and Smola, A.}, interhash = {f97179c7ebe10f64411417f9e05563a8}, intrahash = {dc79351cca889847d9d20c7ef9dafa25}, publisher = {MIT Press}, title = {{Making Large-Scale SVM Learning Practical}}, year = 1999 } @inproceedings{ls_leimeister, address = {Helsinki, Finland (accepted for publication)}, author = {Bitzer, Philipp and Weiß, Frank and Leimeister, Jan Marco}, booktitle = {Eighth International Conference on Design Science Research in Information Systems and Technology (DESRIST)}, interhash = {48a19913ff4a7fda6f2ffac9c1b0af08}, intrahash = {ecc572acde1b82bc3db34fcfd34c4e31}, title = {Towards a Reference Model for a Productivity-optimized Delivery of Technology Mediated }, year = 2013 } @inproceedings{ls_leimeister, address = {Utrecht, Netherlands (accepted for publication)}, author = {Bitzer, Philipp and Söllner, Matthias}, booktitle = {European Conference on Information Systems (ECIS)}, interhash = {bc6ff6701f1e1673fa90aa643e1d00a6}, intrahash = {2b38076bc602a1a630b3aef8f0cc6215}, title = {Towards a Productivity Measurement Model for Technology Mediated Learning Services}, year = 2013 } @inproceedings{coates2011analysis, abstract = {A great deal of research has focused on algorithms for learning features from unlabeled data. Indeed, much progress has been made on benchmark datasets like NORB and CIFAR-10 by employing increasingly complex unsupervised learning algorithms and deep models. In this paper, however, we show that several simple factors, such as the number of hidden nodes in the model, may be more important to achieving high performance than the learning algorithm or the depth of the model. Specifically, we will apply several off-the-shelf feature learning algorithms (sparse auto-encoders, sparse RBMs, K-means clustering, and Gaussian mixtures) to CIFAR-10, NORB, and STL datasets using only single-layer networks. We then present a detailed analysis of the effect of changes in the model setup: the receptive field size, number of hidden nodes (features), the step-size ("stride") between extracted features, and the effect of whitening. Our results show that large numbers of hidden nodes and dense feature extraction are critical to achieving high performance - so critical, in fact, that when these parameters are pushed to their limits, we achieve state-of-the-art performance on both CIFAR-10 and NORB using only a single layer of features. More surprisingly, our best performance is based on K-means clustering, which is extremely fast, has no hyper-parameters to tune beyond the model structure itself, and is very easy to implement. Despite the simplicity of our system, we achieve accuracy beyond all previously published results on the CIFAR-10 and NORB datasets (79.6% and 97.2% respectively).}, author = {Coates, A. and Lee, H. and Ng, A.Y.}, booktitle = {Proceedings of the Fourteenth International Conference on Artificial Intelligence and Statistics}, editor = {Gordon, Geoffrey and Dunson, David and Dudík, Miroslav}, interhash = {46cfb4b5b1c16c79a966512e07f67158}, intrahash = {bcb2c1fd335ae57362cdf348ff727589}, pages = {215--223}, publisher = {JMLR W\&CP}, series = {JMLR Workshop and Conference Proceedings}, title = {An analysis of single-layer networks in unsupervised feature learning}, url = {http://jmlr.csail.mit.edu/proceedings/papers/v15/coates11a.html}, volume = 15, year = 2011 } @inproceedings{coates2011detection, abstract = {Reading text from photographs is a challenging problem that has received a significant amount of attention. Two key components of most systems are (i) text detection from images and (ii) character recognition, and many recent methods have been proposed to design better feature representations and models for both. In this paper, we apply methods recently developed in machine learning -- specifically, large-scale algorithms for learning the features automatically from unlabeled data -- and show that they allow us to construct highly effective classifiers for both detection and recognition to be used in a high accuracy end-to-end system.}, author = {Coates, A. and Carpenter, B. and Case, C. and Satheesh, S. and Suresh, B. and Wang, Tao and Wu, D.J. and Ng, A.Y.}, booktitle = {International Conference on Document Analysis and Recognition (ICDAR)}, doi = {10.1109/ICDAR.2011.95}, interhash = {adb17817e5f95605a8066737ce0e8b7e}, intrahash = {b550ca5ec5a8b61b64b17091f7b2eeab}, issn = {1520-5363}, month = sep, pages = {440--445}, title = {Text Detection and Character Recognition in Scene Images with Unsupervised Feature Learning}, url = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=6065350&tag=1}, year = 2011 } @article{ls_leimeister, author = {Wegener, R. and Leimeister, J. M.}, interhash = {66bccfd06244422ab0072cbef4d1e3ab}, intrahash = {1ae66b27b82ee8771b7ff6b8f9a3b8ad}, journal = {International Journal of Technology Enhanced Learning (IJTEL)}, note = {JML_390}, number = {5/6}, pages = {383 - 397}, title = {Virtual Learning Communities: Success Factors and Challenges}, volume = 4, year = 2012 } @incollection{rubens2011active, author = {Rubens, Neil and Kaplan, Dain and Sugiyama, Masashi}, booktitle = {Recommender Systems Handbook}, chapter = 23, doi = {10.1007/978-0-387-85820-3_23}, editor = {Ricci, Francesco and Rokach, Lior and Shapira, Bracha and Kantor, Paul B.}, interhash = {eab8d17924be10a7999ea09e6ed3be59}, intrahash = {e0b5682c1c228037aee63a459e2e2c62}, isbn = {978-0-387-85819-7}, language = {English}, pages = {735--767}, publisher = {Springer US}, title = {Active Learning in Recommender Systems}, url = {http://dx.doi.org/10.1007/978-0-387-85820-3_23}, year = 2011 } @incollection{leake2000casebased, abstract = {Case-based reasoning(CBR) is an artificial intelligence paradigm for reasoning and learning. Case-based reasoning solves new problems by retrieving stored records of prior problem-solving episodes (cases) and adapting their solutions to fit new circumstances. Each processing episode provides a new case that is stored for future reuse, making learning a natural side-effect of the reasoning process. Case-based reasoning is also studied within cognitive science as a model of human reasoning: studies show that people use recollections of prior problems to guide their reasoning in a wide range of tasks, such as programming, mathematical problem solving, diagnosis, decision making, and design.}, acmid = {1074199}, address = {Chichester, UK}, author = {Leake, David B.}, booktitle = {Encyclopedia of Computer Science}, edition = {4th}, editor = {Ralston, Anthony and Reilly, Edwin D. and Hemmendinger, David}, interhash = {fa414e2f48be14bb94cbfbf2566e36af}, intrahash = {b8526b7c03f1fc9bdd85863dfbf881a2}, isbn = {0-470-86412-5}, month = jun, numpages = {2}, pages = {196--197}, publisher = {John Wiley and Sons Ltd.}, title = {Case-based reasoning}, url = {http://dl.acm.org/citation.cfm?id=1074100.1074199}, year = 2000 } @inproceedings{hearst1992automatic, abstract = {We describe a method for the automatic acquisition of the hyponymy lexical relation from unrestricted text. Two goals motivate the approach: (i) avoidance of the need for pre-encoded knowledge and (ii) applicability across a wide range of text. We identify a set of lexico-syntactic patterns that are easily recognizable, that occur frequently and across text genre boundaries, and that indisputably indicate the lexical relation of interest. We describe a method for discovering these patterns and suggest that other lexical relations will also be acquirable in this way. A subset of the acquisition algorithm is implemented and the results are used to augment and critique the structure of a large hand-built thesaurus. Extensions and applications to areas such as information retrieval are suggested.}, acmid = {992154}, address = {Stroudsburg, PA, USA}, author = {Hearst, Marti A.}, booktitle = {Proceedings of the 14th conference on Computational linguistics}, doi = {10.3115/992133.992154}, interhash = {8c1e90c6cc76625c34f20370a1af7ea2}, intrahash = {2c49ad19ac6977bd806b6687e4dcc550}, location = {Nantes, France}, numpages = {7}, pages = {539--545}, publisher = {Association for Computational Linguistics}, title = {Automatic acquisition of hyponyms from large text corpora}, url = {http://dx.doi.org/10.3115/992133.992154}, volume = 2, year = 1992 } @inproceedings{brew2010using, abstract = {Tracking sentiment in the popular media has long been of interest to media analysts and pundits. With the availability of news content via online syndicated feeds, it is now possible to automate some aspects of this process. There is also great potential to crowdsource Crowdsourcing is a term, sometimes associated with Web 2.0 technologies, that describes outsourcing of tasks to a large often anonymous community. much of the annotation work that is required to train a machine learning system to perform sentiment scoring. We describe such a system for tracking economic sentiment in online media that has been deployed since August 2009. It uses annotations provided by a cohort of non-expert annotators to train a learning system to classify a large body of news items. We report on the design challenges addressed in managing the effort of the annotators and in making annotation an interesting experience.}, acmid = {1860997}, address = {Amsterdam, The Netherlands, The Netherlands}, author = {Brew, Anthony and Greene, Derek and Cunningham, Pádraig}, booktitle = {Proceedings of the 19th European Conference on Artificial Intelligence}, editor = {Coelho, Helder and Studer, Rudi and Wooldridge, Michael}, interhash = {90650749ea1084b729710d37b5865b72}, intrahash = {9643e3c5729886b0b4e85cb3d3d704f5}, isbn = {978-1-60750-605-8}, numpages = {6}, pages = {145--150}, publisher = {IOS Press}, series = {Frontiers in Artificial Intelligence and Applications}, title = {Using Crowdsourcing and Active Learning to Track Sentiment in Online Media}, url = {http://dl.acm.org/citation.cfm?id=1860967.1860997}, volume = 215, year = 2010 } @inproceedings{dong2009overview, author = {Dong, Xishuang and Chen, Xiaodong and Guan, Yi and Yu, Zhiming and Li, Sheng}, booktitle = {CSIE (3)}, crossref = {conf/csie/2009}, editor = {Burgin, Mark and Chowdhury, Masud H. and Ham, Chan H. and Ludwig, Simone A. and Su, Weilian and Yenduri, Sumanth}, ee = {http://doi.ieeecomputersociety.org/10.1109/CSIE.2009.1090}, interhash = {038285e30e929088afad8d82c066ef75}, intrahash = {d970cfabe05f5e19100099afa11b9873}, isbn = {978-0-7695-3507-4}, pages = {600-606}, publisher = {IEEE Computer Society}, title = {An Overview of Learning to Rank for Information Retrieval.}, url = {http://dblp.uni-trier.de/db/conf/csie/csie2009-3.html#DongCGYL09}, year = 2009 } @inproceedings{ls_leimeister, address = {Orlando Florida, USA}, author = {Wegener, René and Menschner, Philipp and Leimeister, Jan Marco}, booktitle = {Proceedings of the International Conference on Information Systems (ICIS)}, interhash = {353a397ecd7bbefe81ac577b07deb13a}, intrahash = {fa79180c575e20231e48a8c588d892fd}, note = 323, title = {Design and evaluation of a didactical service blueprinting method for large scale lectures}, url = {http://pubs.wi-kassel.de/wp-content/uploads/2013/04/JML_323.pdf}, year = 2012 } @inproceedings{ls_leimeister, address = {Seattle, Washington, USA}, author = {Bitzer, Philipp and Lehmann, Katja and Leimeister, Jan Marco}, booktitle = {Proceedings of the Eighteenth Americas Conference on Information Systems (AMCIS)}, interhash = {6f0cee07b5b7122a38a74c26ef10b2c9}, intrahash = {cb229ff577cd5c2e04f382f20d9c9678}, note = 340, title = {A Literature Review on the Indicators for the Measurement of Technology Mediated Learning Productivity}, url = {http://pubs.wi-kassel.de/wp-content/uploads/2013/03/JML_376.pdf}, year = 2012 } @article{raykar2010learning, abstract = {For many supervised learning tasks it may be infeasible (or very expensive) to obtain objective and reliable labels. Instead, we can collect subjective (possibly noisy) labels from multiple experts or annotators. In practice, there is a substantial amount of disagreement among the annotators, and hence it is of great practical interest to address conventional supervised learning problems in this scenario. In this paper we describe a probabilistic approach for supervised learning when we have multiple annotators providing (possibly noisy) labels but no absolute gold standard. The proposed algorithm evaluates the different experts and also gives an estimate of the actual hidden labels. Experimental results indicate that the proposed method is superior to the commonly used majority voting baseline.}, acmid = {1859894}, author = {Raykar, Vikas C. and Yu, Shipeng and Zhao, Linda H. and Valadez, Gerardo Hermosillo and Florin, Charles and Bogoni, Luca and Moy, Linda}, interhash = {8113daf47997fddf48e4c6c79f2eba56}, intrahash = {14220abe8babfab01c0cdd5ebd5e4b7c}, issn = {1532-4435}, issue_date = {3/1/2010}, journal = {Journal of Machine Learning Research}, month = aug, numpages = {26}, pages = {1297--1322}, publisher = {JMLR.org}, title = {Learning From Crowds}, url = {http://dl.acm.org/citation.cfm?id=1756006.1859894}, volume = 11, year = 2010 } @inproceedings{bullock2011tagging, author = {Bullock, Beate Navarro and Jäschke, Robert and Hotho, Andreas}, booktitle = {Proceedings of the ACM WebSci'11}, interhash = {7afaa67dfeb07f7e0b85abf2be61aff1}, intrahash = {493e03868a98f498628cad31f9320e9f}, month = {June}, title = {Tagging data as implicit feedback for learning-to-rank}, url = {http://journal.webscience.org/463/}, year = 2011 } @inproceedings{tane04semantic, author = {Tane, Julien and Schmitz, Christoph and Stumme, Gerd}, bibsource = {DBLP, http://dblp.uni-trier.de}, booktitle = {Proc. 13th International World Wide Web Conference (WWW 2004)}, ee = {http://doi.acm.org/10.1145/1013369}, interhash = {92089bd1e3e798b50fe7c82cd8333e7b}, intrahash = {10efb9940c6f2881cd6c84b6c52bebd8}, pages = {1-10}, title = {Semantic resource management for the web: an e-learning application}, url = {http://www.www2004.org/proceedings/docs/2p1.pdf}, year = 2004 } @article{ls_leimeister, author = {Wegener, R. and Prinz, A. and Leimeister, J. M.}, interhash = {afa79c9ef12668f6401e9a1c472335de}, intrahash = {b55bbb0a4aaaafa369508f0833e34e25}, journal = {Hamburger eLearning-Magazin}, note = {216 (61-10)}, number = 5, title = {Interaktiv, überall & jederzeit - wie Tablets und Netbooks die Lehre verändern können}, url = {http://www.uni-kassel.de/fb7/ibwl/leimeister/pub/JML_216.pdf}, year = 2010 } @inproceedings{ls_leimeister, address = {Kaiserslautern, Germany (to appear)}, author = {Wegener, R. and Prinz, A. and Leimeister, J. M.}, booktitle = {6. Konferenz Mobile und Ubiquitäre Informationssysteme (MMS)}, interhash = {fc28ada102ca1f53a8be700a4290a434}, intrahash = {fd9e2a76812879924f9ec35c652ff867}, note = {218 (04-11) }, number = 6, title = {Entwicklung innovativer, mobiler Lernanwendungen für den Einsatz in Massenveranstaltungen}, year = 2011 } @inproceedings{DBLP:conf/pkdd/ADHSS11, author = {Scholz, Christoph and Doerfel, Stephan and Atzmueller, Martin and Hotho, Andreas and Stumme, Gerd}, interhash = {d81c55cdcdf8ee331595bbb4d6fd51d6}, intrahash = {c1614b434eb13f0f42884ccffae8141d}, title = {Resource-Aware On-Line RFID Localization Using Proximity Data}, year = 2011 } @incollection{solskinnsbakk2010hybrid, abstract = {Folksonomies are becoming increasingly popular. They contain large amounts of data which can be mined and utilized for many tasks like visualization, browsing, information retrieval etc. An inherent problem of folksonomies is the lack of structure. In this paper we present an unsupervised approach for generating such structure based on a combination of association rule mining and the underlying tagged material. Using the underlying tagged material we generate a semantic representation of each tag. The semantic representation of the tags is an integral component of the structure generated. The experiment presented in this paper shows promising results with tag structures that correspond well with human judgment.}, address = {Berlin / Heidelberg}, affiliation = {Department of Computer and Information Science, Norwegian University of Science and Technology, Trondheim, Norway}, author = {Solskinnsbakk, Geir and Gulla, Jon}, booktitle = {On the Move to Meaningful Internet Systems, OTM 2010}, doi = {10.1007/978-3-642-16949-6_22}, editor = {Meersman, Robert and Dillon, Tharam and Herrero, Pilar}, interhash = {c33c0fe08d8ac29e88a4c43b3047c707}, intrahash = {949d497bc5a29eda10c77f5784aed18b}, isbn = {978-3-642-16948-9}, keyword = {Computer Science}, pages = {975-982}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, slides = {http://www.slides.com}, title = {A Hybrid Approach to Constructing Tag Hierarchies}, url = {http://dx.doi.org/10.1007/978-3-642-16949-6_22}, volume = 6427, year = 2010 } @inproceedings{plangprasopchok2010probabilistic, abstract = {Learning structured representations has emerged as an important problem in many domains, including document and Web data mining, bioinformatics, and image analysis. One approach to learning complex structures is to integrate many smaller, incomplete and noisy structure fragments. In this work, we present an unsupervised probabilistic approach that extends affinity propagation to combine the small ontological fragments into a collection of integrated, consistent, and larger folksonomies. This is a challenging task because the method must aggregate similar structures while avoiding structural inconsistencies and handling noise. We validate the approach on a real-world social media dataset, comprised of shallow personal hierarchies specified by many individual users, collected from the photosharing website Flickr. Our empirical results show that our proposed approach is able to construct deeper and denser structures, compared to an approach using only the standard affinity propagation algorithm. Additionally, the approach yields better overall integration quality than a state-of-the-art approach based on incremental relational clustering. }, author = {Plangprasopchok, Anon and Lerman, Kristina and Getoor, Lise}, booktitle = {Proceedings of the 4th ACM Web Search and Data Mining Conference}, interhash = {826359ec25dcd228ad3ef46dcc6d26c5}, intrahash = {455bb173bb33af58bc8aaed48d8a8513}, note = {cite arxiv:1011.3557Comment: In Proceedings of the 4th ACM Web Search and Data Mining Conference (WSDM)}, title = {A Probabilistic Approach for Learning Folksonomies from Structured Data}, url = {http://arxiv.org/abs/1011.3557}, year = 2010 } @book{Brown.2004, abstract = {A real revolution is taking place in the way in which we conceptualise and practise education and learning. This book sets out to explore the immense impact which digital technology is having on education around the world and the ways in which it is used by a wide range of individuals and communities. Contributors analyse changes in technology such as e-mail, the Internet, digital video and other media, but also the effect of this new technology on the way people live and learn around the world. Cultural changes taking place range from the blurring of boundaries between formal and informal learning to the development of new 'virtual communities' which revolve around particular social or cultural interests, and which serve as a crucial tool and source of identity for spatially displaced communities such as refugees. Digital technology is changing the way we all live, and this book is an authoritative study of these changes in all their diversity. Liesbeth de Block, Institute of Education, University of London, UK Erica C. Boling, Rutgers University, USA Bart Bonamie, Ghent University, Belgium Alain Breuleux, McGill University, Canada}, address = {London, UK}, editor = {Brown, A. and Davis, N.}, interhash = {92adbb969a48a405c72c0b3a0344a107}, intrahash = {6d5eae2492e07fd46f54a68c7e071725}, publisher = {Routledge Falmer}, title = {World Yearbook of Education 2004: Digital Technologies, Communities and Education}, year = 2004 } @book{Cooch.2010, abstract = {The book looks at the main functional areas of Moodle that have significant new features, explains the new features and how to use them. It draws attention to significant differences from how things used to behave, and gives the reader an idea of the kind of consequences these changes will bring to them. If you are an existing Moodle user, tutor, or administrator, then this book is for you. You are expected to be familiar with the operation of Moodle.}, author = {Cooch, M.}, interhash = {6db5f7aa65992e77929d30cff574c13d}, intrahash = {c589adf919d892a8b425716fc4bfde44}, publisher = {Packt Publishing}, title = {Moodle 2.0 First Look}, year = 2010 } @book{Horn.2008, address = {New York}, author = {Christensen, C. and Johnson, C. W. and Horn, M. B.}, interhash = {46f88ab8631dda67b33c6f782ef32ba5}, intrahash = {b07252a5bd1699e979a9d3a734bd08bf}, note = {ITEG_233 288}, publisher = {McGraw-Hill}, title = {Disrupting Class: How Disruptive Innovation Will Change the Way the World Learns}, year = 2008 } @inproceedings{zhou2008unsupervised, abstract = {This paper deals with the problem of exploring hierarchical semantics from social annotations. Recently, social annotationservices have become more and more popular in Semantic Web. It allows users to arbitrarily annotate web resources, thus, largelylowers the barrier to cooperation. Furthermore, through providing abundant meta-data resources, social annotation might becomea key to the development of Semantic Web. However, on the other hand, social annotation has its own apparent limitations,for instance, 1) ambiguity and synonym phenomena and 2) lack of hierarchical information. In this paper, we propose an unsupervisedmodel to automatically derive hierarchical semantics from social annotations. Using a social bookmark service Del.icio.usas example, we demonstrate that the derived hierarchical semantics has the ability to compensate those shortcomings. We furtherapply our model on another data set from Flickr to testify our model’s applicability on different environments. The experimentalresults demonstrate our model’s efficiency.}, author = {Zhou, Mianwei and Bao, Shenghua and Wu, Xian and Yu, Yong}, file = {zhou2008unsupervised.pdf:zhou2008unsupervised.pdf:PDF}, groups = {public}, interhash = {e8397fd51d43531b91e81776c879f487}, intrahash = {ee6da1cc1300cf4fb68fc58d5e2bb819}, journal = {The Semantic Web}, pages = {680--693}, timestamp = {2009-09-24 23:27:32}, title = {An Unsupervised Model for Exploring Hierarchical Semantics from Social Annotations}, url = {http://dx.doi.org/10.1007/978-3-540-76298-0_49}, username = {dbenz}, year = 2008 } @article{Hazman:30May2009:1744-2621:24, abstract = {Ontologies play a vital role in many web- and internet-related applications. This work presents a system for accelerating the ontology building process via semi-automatically learning a hierarchal ontology given a set of domain-specific web documents and a set of seed concepts. The methods are tested with web documents in the domain of agriculture. The ontology is constructed through the use of two complementary approaches. The presented system has been used to build an ontology in the agricultural domain using a set of Arabic extension documents and evaluated against a modified version of the AGROVOC ontology.}, author = {Hazman, Maryam and El-Beltagy, Samhaa R. and Rafea, Ahmed}, doi = {doi:10.1504/IJMSO.2009.026251}, interhash = {fe27d687bcba91a7a6fe51eec9a2b87d}, intrahash = {323c8bdedc8a4643232a498ac03d6407}, journal = {International Journal of Metadata, Semantics and Ontologies}, pages = {24-33(10)}, title = {Ontology learning from domain specific web documents}, url = {http://www.ingentaconnect.com/content/ind/ijmso/2009/00000004/F0020001/art00003}, volume = 4, year = 2009 } @article{ryu2009toward, abstract = {This paper describes new thesaurus construction method in which class-based, small size thesauruses are constructed and merged as a whole based on domain classification system. This method has advantages in that 1) taxonomy construction complexity is reduced, 2) each class-based thesaurus can be reused in other domain thesaurus, and 3) term distribution per classes in target domain is easily identified. The method is composed of three steps: term extraction step, term classification step, and taxonomy construction step. All steps are balanced approaches of automatic processing and manual verification. We constructed Korean IT domain thesaurus based on proposed method. Because terms are extracted from Korean newspaper and patent corpus in IT domain, the thesaurus includes many Korean neologisms. The thesaurus consists of 81 upper level classes and over 1,000 IT terms.}, author = {Ryu, P.M. and Kim, J.H. and Nam, Y. and Huang, J.X. and Shin, S. and Lee, S.M. and Choi, K.S.}, file = {ryu2009toward.pdf:ryu2009toward.pdf:PDF}, groups = {public}, interhash = {33037e9884a62f1994c9d45eb68c27e7}, intrahash = {bd4f375366e49a3eb31e60b268dca01c}, journal = {Relation}, journalpub = {1}, number = {1.129}, pages = 7396, publisher = {Citeseer}, timestamp = {2010-11-09 12:05:09}, title = {{Toward Domain Specific Thesaurus Construction: Divide-and-Conquer Method}}, url = {http://scholar.google.de/scholar.bib?q=info:4K_xIsqmea0J:scholar.google.com/&output=citation&hl=de&as_sdt=2000&ct=citation&cd=9}, username = {dbenz}, volume = 10, year = 2009 } @inproceedings{curran2002improvements, acmid = {1118635}, address = {Morristown, NJ, USA}, author = {Curran, James R. and Moens, Marc}, booktitle = {Proceedings of the ACL-02 workshop on Unsupervised lexical acquisition - Volume 9}, doi = {http://dx.doi.org/10.3115/1118627.1118635}, interhash = {60f02af9c8ba2985c007546411edc529}, intrahash = {dbdc195432099ba86c6fb91e56ce3811}, location = {Philadelphia, Pennsylvania}, numpages = {8}, pages = {59--66}, publisher = {Association for Computational Linguistics}, title = {Improvements in automatic thesaurus extraction}, url = {http://dx.doi.org/10.3115/1118627.1118635}, year = 2002 } @article{lux2008from, abstract = {Is Web 2.0 just hype or just a buzzword, which might disappear in the near future One way to find answers to these questions is to investigate the actual benefit of the Web 2.0 for real use cases. Within this contribution we study a very special aspect of the Web 2.0 the folksonomy and its use within self-directed learning. Guided by conceptual principles of emergent computing we point out methods, which might be able to let semantics emerge from folksonomies and discuss the effect of the results in self-directed learning.}, author = {Lux, Mathias and Dösinger, Gisela}, doi = {10.1504/IJKL.2007.016709}, groups = {public}, interhash = {5dde7a91231320f96c0c4b3e7ba9a503}, intrahash = {dd5cdcc6449d97622033bbebcd4d1874}, journal = {International Journal of Knowledge and Learning}, journalpub = {1}, month = jan, number = {4-5}, pages = {515--528}, timestamp = {2010-08-11 07:26:38}, title = {From folksonomies to ontologies: employing wisdom of the crowds to serve learning purposes}, url = {http://www.ingentaconnect.com/content/ind/ijkl/2008/00000003/F0020004/art00009}, username = {dbenz}, volume = 3, year = 2008 } @article{zhou2008unsupervised, abstract = {This paper deals with the problem of exploring hierarchical semantics from social annotations. Recently, social annotationservices have become more and more popular in Semantic Web. It allows users to arbitrarily annotate web resources, thus, largelylowers the barrier to cooperation. Furthermore, through providing abundant meta-data resources, social annotation might becomea key to the development of Semantic Web. However, on the other hand, social annotation has its own apparent limitations,for instance, 1) ambiguity and synonym phenomena and 2) lack of hierarchical information. In this paper, we propose an unsupervisedmodel to automatically derive hierarchical semantics from social annotations. Using a social bookmark service Del.icio.usas example, we demonstrate that the derived hierarchical semantics has the ability to compensate those shortcomings. We furtherapply our model on another data set from Flickr to testify our model’s applicability on different environments. The experimentalresults demonstrate our model’s efficiency.}, author = {Zhou, Mianwei and Bao, Shenghua and Wu, Xian and Yu, Yong}, file = {zhou2008unsupervised.pdf:zhou2008unsupervised.pdf:PDF}, interhash = {e8397fd51d43531b91e81776c879f487}, intrahash = {ee6da1cc1300cf4fb68fc58d5e2bb819}, journal = {The Semantic Web}, pages = {680--693}, title = {An Unsupervised Model for Exploring Hierarchical Semantics from Social Annotations}, url = {http://dx.doi.org/10.1007/978-3-540-76298-0_49}, year = 2008 } @article{cimiano2006ontologies, abstract = {Ontologies are nowadays used for many applications requiring data, services and resources in general to be interoperable and machine understandable. Such applications are for example web service discovery and composition, information integration across databases, intelligent search, etc. The general idea is that data and services are semantically described with respect to ontologies,which are formal specifications of a domain of interest, and can thus be shared and reused in a way such that the shared meaning specified by the ontology remains formally the same across different parties and applications. As the cost of creating ontologies is relatively high, different proposals have emerged for learning ontologies from structured and unstructured resources. In this article we examine the maturity of techniques for ontology learning from textual resources, addressing the question whether the state-of-the-art is mature enough to produce ontologies ‘on demand’.}, author = {Cimiano, Philipp and Völker, Johanna and Studer, Rudi}, file = {cimiano2006ontologies.pdf:cimiano2006ontologies.pdf:PDF}, groups = {public}, interhash = {aeb553dc2e190f0a5974dfdc709d450a}, intrahash = {fe4c2950b5be221b493e29e4339240e8}, journal = {Information, Wissenschaft und Praxis}, journalpub = {1}, month = OCT, note = {see the special issue for more contributions related to the Semantic Web}, number = {6-7}, pages = {315-320}, timestamp = {2008-07-23 11:47:29}, title = {Ontologies on Demand? - A Description of the State-of-the-Art, Applications, Challenges and Trends for Ontology Learning from Text}, url = {\url{http://www.aifb.uni-karlsruhe.de/WBS/pci/Publications/iwp06.pdf}}, username = {dbenz}, volume = 57, year = 2006 } @inproceedings{marinho2008folksonomybased, abstract = {The growing popularity of social tagging systems promises to alleviate the knowledge bottleneck that slows down the full materialization of the SemanticWeb since these systems allow ordinary users to create and share knowledge in a simple, cheap, and scalable representation, usually known as folksonomy. However, for the sake of knowledge workflow, one needs to find a compromise between the uncontrolled nature of folksonomies and the controlled and more systematic vocabulary of domain experts. In this paper we propose to address this concern by devising a method that automatically enriches a folksonomy with domain expert knowledge and by introducing a novel algorithm based on frequent itemset mining techniques to efficiently learn an ontology over the enriched folksonomy. In order to quantitatively assess our method, we propose a new benchmark for task-based ontology evaluation where the quality of the ontologies is measured based on how helpful they are for the task of personalized information finding. We conduct experiments on real data and empirically show the effectiveness of our approach.}, author = {Marinho, Leandro Balby and Buza, Krisztian and Schmidt-Thieme, Lars}, booktitle = {International Semantic Web Conference}, crossref = {conf/semweb/2008}, date = {2008-10-24}, editor = {Sheth, Amit P. and Staab, Steffen and Dean, Mike and Paolucci, Massimo and Maynard, Diana and Finin, Timothy W. and Thirunarayan, Krishnaprasad}, ee = {http://dx.doi.org/10.1007/978-3-540-88564-1_17}, file = {marinho2008folksonomybased.pdf:marinho2008folksonomybased.pdf:PDF}, groups = {public}, interhash = {d295e7d4615500c670e70ad240fada29}, intrahash = {cfa4c4520d4cf02e03dd3b84bb5c9578}, isbn = {978-3-540-88563-4}, pages = {261-276}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, timestamp = {2010-03-30 16:14:58}, title = {Folksonomy-Based Collabulary Learning.}, url = {http://dblp.uni-trier.de/db/conf/semweb/iswc2008.html#MarinhoBS08}, username = {dbenz}, volume = 5318, year = 2008 } @inproceedings{rattenbury2007towards, abstract = {We describe an approach for extracting semantics of tags, unstructured text-labels assigned to resources on the Web, based on each tag's usage patterns. In particular, we focus on the problem of extracting place and event semantics for tags that are assigned to photos on Flickr, a popular photo sharing website that supports time and location (latitude/longitude) metadata. We analyze two methods inspired by well-known burst-analysis techniques and one novel method: Scale-structure Identification. We evaluate the methods on a subset of Flickr data, and show that our Scale-structure Identification method outperforms the existing techniques. The approach and methods described in this work can be used in other domains such as geo-annotated web pages, where text terms can be extracted and associated with usage patterns.}, address = {New York, NY, USA}, author = {Rattenbury, Tye and Good, Nathaniel and Naaman, Mor}, booktitle = {SIGIR '07: Proceedings of the 30th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval}, doi = {10.1145/1277741.1277762}, file = {rattenbury2007towards.pdf:rattenbury2007towards.pdf:PDF}, groups = {public}, interhash = {8b02d2b3fdbb97c3db6e3b23079a56e5}, intrahash = {bf6f73d2ef74ca6f1d355fb5688b673c}, isbn = {978-1-59593-597-7}, pages = {103--110}, publisher = {ACM Press}, timestamp = {2010-11-10 15:35:25}, title = {Towards automatic extraction of event and place semantics from flickr tags}, url = {http://dx.doi.org/10.1145/1277741.1277762}, username = {dbenz}, year = 2007 } @inproceedings{silva2009semiautomatic, abstract = {This paper introduces WikiOnto: a system that assists in the extraction and modeling of topic ontologies in a semi-automatic manner using a preprocessed document corpus derived from Wikipedia. Based on the Wikipedia XML Corpus, we present a three-tiered framework for extracting topic ontologies in quick time and a modeling environment to refine these ontologies. Using natural language processing (NLP) and other machine learning (ML) techniques along with a very rich document corpus, this system proposes a solution to a task that is generally considered extremely cumbersome. The initial results of the prototype suggest strong potential of the system to become highly successful in ontology extraction and modeling and also inspire further research on extracting ontologies from other semi-structured document corpora as well.}, author = {Silva, L. De and Jayaratne, L.}, booktitle = {Applications of Digital Information and Web Technologies, 2009. ICADIWT '09. Second International Conference on the}, doi = {10.1109/ICADIWT.2009.5273871}, file = {silva2009semiautomatic.pdf:silva2009semiautomatic.pdf:PDF}, groups = {public}, interhash = {c1996cb9e69de56e2bb2f8e763fe0482}, intrahash = {66bec053541e521fbe68c0119806ae49}, month = {Aug.}, pages = {446-451}, timestamp = {2010-02-23 12:54:40}, title = {Semi-automatic extraction and modeling of ontologies using Wikipedia XML Corpus}, url = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?isnumber=5273826&arnumber=5273871&count=156&index=116}, username = {dbenz}, year = 2009 } @inproceedings{tane2003courseware, abstract = {Topics in education are changing with an ever faster pace. E-Learningresources tend to be more and more decentralised. Users need increasingly to be able touse the resources of the web. For this, they should have tools for finding and organizinginformation in a decentral way. In this, paper, we show how an ontology-based toolsuite allows to make the most of the resources available on the web.}, author = {Tane, Julien and Schmitz, Christoph and Stumme, Gerd and Staab, Steffen and Studer, R.}, booktitle = {Mobiles Lernen und Forschen - Beiträge der Fachtagung an der Universität}, editor = {David, Klaus and Wegner, Lutz}, file = {tane2003courseware.pdf:tane2003courseware.pdf:PDF}, groups = {public}, interhash = {7f33080bb78d089b24bf51c059f8f018}, intrahash = {850949481723b7dd03768ccd96b25cb9}, month = {November}, pages = {93-104}, publisher = {Kassel University Press}, timestamp = {2010-11-10 15:35:25}, title = {The Courseware Watchdog: an Ontology-based tool for finding and organizing learning material}, url = {http://www.kde.cs.uni-kassel.de/stumme/papers/2003/tane2003courseware.pdf}, username = {dbenz}, year = 2003 } @inproceedings{wu2009learning, 0 = {http://portal.acm.org/citation.cfm?id=1526709.1526758}, 1 = {http://dx.doi.org/10.1145/1526709.1526758}, abstract = {Social tagging provides valuable and crucial information for large-scale web image retrieval. It is ontology-free and easy to obtain; however, irrelevant tags frequently appear, and users typically will not tag all semantic objects in the image, which is also called semantic loss. To avoid noises and compensate for the semantic loss, tag recommendation is proposed in literature. However, current recommendation simply ranks the related tags based on the single modality of tag co-occurrence on the whole dataset, which ignores other modalities, such as visual correlation. This paper proposes a multi-modality recommendation based on both tag and visual correlation, and formulates the tag recommendation as a learning problem. Each modality is used to generate a ranking feature, and Rankboost algorithm is applied to learn an optimal combination of these ranking features from different modalities. Experiments on Flickr data demonstrate the effectiveness of this learning-based multi-modality recommendation strategy.}, address = {New York, NY, USA}, at = {2009-04-23 17:01:03}, author = {Wu, Lei and Yang, Linjun and Yu, Nenghai and Hua, Xian S.}, booktitle = {WWW '09: Proceedings of the 18th international conference on World wide web}, doi = {10.1145/1526709.1526758}, file = {wu2009learning.pdf:wu2009learning.pdf:PDF}, groups = {public}, interhash = {8389ee83e70d619168c6e52bf499742d}, intrahash = {e58e20189ca9601b33007479478fbefe}, isbn = {978-1-60558-487-4}, location = {Madrid, Spain}, misc_id = {4387938}, pages = {361--370}, priority = {0}, publisher = {ACM}, timestamp = {2011-02-02 15:26:27}, title = {Learning to tag}, url = {http://dx.doi.org/10.1145/1526709.1526758}, username = {dbenz}, year = 2009 } @inproceedings{conf/ecai/HjelmB08, author = {Hjelm, Hans and Buitelaar, Paul}, booktitle = {ECAI}, crossref = {conf/ecai/2008}, editor = {Ghallab, Malik and Spyropoulos, Constantine D. and Fakotakis, Nikos and Avouris, Nikolaos M.}, ee = {http://dx.doi.org/10.3233/978-1-58603-891-5-288}, interhash = {21a658154fb1a02e773b7a678b15f9f4}, intrahash = {813903a333a40ecf9a59ded552acb323}, isbn = {978-1-58603-891-5}, pages = {288-292}, publisher = {IOS Press}, series = {Frontiers in Artificial Intelligence and Applications}, title = {Multilingual Evidence Improves Clustering-based Taxonomy Extraction.}, url = {http://www.ling.su.se/staff/hans/artiklar/ecai2008-hjelm-buitelaar.pdf}, volume = 178, year = 2008 } @incollection{cimiano2009ontology, abstract = {Ontology learning techniques serve the purpose of supporting an ontology engineer in the task of creating and maintaining an ontology. In this chapter, we present a comprehensive and concise introduction to the field of ontology learning. We present a generic architecture for ontology learning systems and discuss its main components. In addition, we introduce the main problems and challenges addressed in the field and give an overview of the most important methods applied. We conclude with a brief discussion of advanced issues which pose interesting challenges to the state-of-the-art.}, author = {Cimiano, P. and Mädche, A. and Staab, S. and Völker, J.}, booktitle = {Handbook on Ontologies}, edition = {2nd revised edition}, editor = {Staab, S. and Studer, R.}, interhash = {5387f28040285a086ab706bc33e7d7af}, intrahash = {f9f8bb0af1a8a514c270f83237313ac7}, pages = {245--267}, publisher = {Springer}, series = {International Handbooks on Information Systems}, title = {Ontology Learning}, url = {http://www.uni-koblenz.de/~staab/Research/Publications/2009/handbookEdition2/ontology-learning-handbook2.pdf}, year = 2009 } @inproceedings{ls_leimeister, address = {Kaiserslautern, Germany}, author = {Wegener, R. and Prinz, A. and Leimeister, J. M.}, booktitle = {6. Konferenz Mobile und Ubiquitäre Informationssysteme (MMS)}, interhash = {fc28ada102ca1f53a8be700a4290a434}, intrahash = {fd9e2a76812879924f9ec35c652ff867}, note = {218 (04-11) }, number = 6, title = {Entwicklung innovativer, mobiler Lernanwendungen für den Einsatz in Massenveranstaltungen}, url = {http://pubs.wi-kassel.de/wp-content/uploads/2013/03/JML_256.pdf}, year = 2011 } @article{ls_leimeister, author = {Wegener, R. and Prinz, A. and Leimeister, J. M.}, interhash = {afa79c9ef12668f6401e9a1c472335de}, intrahash = {b55bbb0a4aaaafa369508f0833e34e25}, journal = {Hamburger eLearning-Magazin}, note = {216 (61-10)}, number = 5, title = {Interaktiv, überall & jederzeit - wie Tablets und Netbooks die Lehre verändern können}, url = {http://pubs.wi-kassel.de/wp-content/uploads/2013/03/JML_184.pdf}, year = 2010 } @article{lux2008folksonomies, abstract = {Is Web 2.0 just hype or just a buzzword, which might disappear in the near future One way to find answers to these questions is to investigate the actual benefit of the Web 2.0 for real use cases. Within this contribution we study a very special aspect of the Web 2.0 the folksonomy and its use within self-directed learning. Guided by conceptual principles of emergent computing we point out methods, which might be able to let semantics emerge from folksonomies and discuss the effect of the results in self-directed learning.}, author = {Lux, Mathias and Dösinger, Gisela}, doi = {10.1504/IJKL.2007.016709}, interhash = {5dde7a91231320f96c0c4b3e7ba9a503}, intrahash = {dd5cdcc6449d97622033bbebcd4d1874}, journal = {International Journal of Knowledge and Learning}, month = jan, number = {4-5}, pages = {515--528}, title = {From folksonomies to ontologies: employing wisdom of the crowds to serve learning purposes}, url = {http://www.ingentaconnect.com/content/ind/ijkl/2008/00000003/F0020004/art00009}, volume = 3, year = 2008 } @inproceedings{rendle2009learning, abstract = {Tag recommendation is the task of predicting a personalized list of tags for a user given an item. This is important for many websites with tagging capabilities like last.fm or delicious. In this paper, we propose a method for tag recommendation based on tensor factorization (TF). In contrast to other TF methods like higher order singular value decomposition (HOSVD), our method RTF ('ranking with tensor factorization') directly optimizes the factorization model for the best personalized ranking. RTF handles missing values and learns from pairwise ranking constraints. Our optimization criterion for TF is motivated by a detailed analysis of the problem and of interpretation schemes for the observed data in tagging systems. In all, RTF directly optimizes for the actual problem using a correct interpretation of the data. We provide a gradient descent algorithm to solve our optimization problem. We also provide an improved learning and prediction method with runtime complexity analysis for RTF. The prediction runtime of RTF is independent of the number of observations and only depends on the factorization dimensions. Besides the theoretical analysis, we empirically show that our method outperforms other state-of-the-art tag recommendation methods like FolkRank, PageRank and HOSVD both in quality and prediction runtime.}, address = {New York, NY, USA}, author = {Rendle, Steffen and Marinho, Leandro Balby and Nanopoulos, Alexandros and Schmidt-Thieme, Lars}, booktitle = {KDD '09: Proceedings of the 15th ACM SIGKDD international conference on Knowledge discovery and data mining}, doi = {10.1145/1557019.1557100}, interhash = {1cc85ca2ec82db2a3caf40fd1795a58a}, intrahash = {1bd672ffb8d6ba5589bb0c7deca09412}, isbn = {978-1-60558-495-9}, location = {Paris, France}, pages = {727--736}, publisher = {ACM}, title = {Learning optimal ranking with tensor factorization for tag recommendation}, url = {http://portal.acm.org/citation.cfm?doid=1557019.1557100}, year = 2009 } @article{journals/kais/GuoV08, author = {Guo, Hongyu and Viktor, Herna L.}, date = {2010-02-02}, ee = {http://dx.doi.org/10.1007/s10115-008-0127-5}, interhash = {430e66171506726f2478939162b789b6}, intrahash = {35bee198e0b131ead007dd91d794ab78}, journal = {Knowl. Inf. Syst.}, number = 3, pages = {287-312}, title = {Multirelational classification: a multiple view approach.}, url = {http://dblp.uni-trier.de/db/journals/kais/kais17.html#GuoV08}, volume = 17, year = 2008 } @inproceedings{conf/cikm/ZhangTLW07, author = {Zhang, Duo and Tang, Jie and Li, Juan-Zi and Wang, Kehong}, booktitle = {CIKM}, crossref = {conf/cikm/2007}, date = {2007-12-10}, editor = {Silva, Mário J. and Laender, Alberto H. F. and Baeza-Yates, Ricardo A. and McGuinness, Deborah L. and Olstad, Bjørn and Olsen, Øystein Haug and Falcão, André O.}, ee = {http://doi.acm.org/10.1145/1321440.1321600}, interhash = {c46bb587aa7f5ca30e77e257e2f3c5b0}, intrahash = {210a9f932dc4c454e8a84ca6eb4906d8}, isbn = {978-1-59593-803-9}, pages = {1019-1022}, publisher = {ACM}, title = {A constraint-based probabilistic framework for name disambiguation.}, url = {http://dblp.uni-trier.de/db/conf/cikm/cikm2007.html#ZhangTLW07}, year = 2007 } @article{citeulike:1220636, abstract = {In our work the traditional bipartite model of ontologies is extended with the social dimension, leading to a tripartite model of actors, concepts and instances. We demonstrate the application of this representation by showing how community-based semantics emerges from this model through a process of graph transformation. We illustrate ontology emergence by two case studies, an analysis of a large scale folksonomy system and a novel method for the extraction of community-based ontologies from Web pages.}, author = {Mika, Peter}, booktitle = {Selected Papers from the International Semantic Web Conference, International Semantic Web Conference (ISWC2005)}, citeulike-article-id = {1220636}, doi = {http://dx.doi.org/10.1016/j.websem.2006.11.002}, interhash = {5bba04607af19c94d2438ae13f362649}, intrahash = {13eb27ecb7ed77655f08adefe6186ea5}, journal = {Web Semantics: Science, Services and Agents on the World Wide Web}, month = {March}, number = 1, pages = {5--15}, posted-at = {2008-11-18 14:30:12}, priority = {5}, title = {Ontologies are us: A unified model of social networks and semantics}, url = {http://dx.doi.org/10.1016/j.websem.2006.11.002}, volume = 5, year = 2007 } @inproceedings{schmitz06-inducing, abstract = {In this paper, we describe some promising initial results in inducing ontology from the Flickr tag vocabulary, using a subsumption-based model. We describe the utility of faceted ontology as a supplement to a tagging system and present our model and results. We propose a revised, probabilistic model using seed ontologies to induce faceted ontology, and describe how the model can integrate into the logistics of tagging communities.}, address = {Edinburgh, Scotland}, author = {Schmitz, Patrick}, booktitle = {Proceedings of the Workshop on Collaborative Tagging at WWW2006}, interhash = {1335f4ef87f951e6edf4fd94f885d3a2}, intrahash = {f913a4ad3a27582ae5d4d269fe38dc5c}, lastdatemodified = {2006-10-12}, lastname = {Schmitz}, month = may, own = {own}, pdf = {schmitz06-inducing.pdf}, read = {readnext}, title = {Inducing Ontology from Flickr Tags}, url = {http://.citeulike.org/user/ryanshaw/article/740688}, year = 2006 } @inproceedings{schmitz2006mining, address = {Berlin, Heidelberg}, author = {Schmitz, Christoph and Hotho, Andreas and Jäschke, Robert and Stumme, Gerd}, booktitle = {Data Science and Classification: Proc. of the 10th IFCS Conf.}, editor = {Batagelj, V. and Bock, H.-H. and Ferligoj, A. and {\v Z}iberna, A.}, interhash = {20650d852ca3b82523fcd8b63e7c12d7}, intrahash = {1e79a0f1c79561073d14434adce1e890}, pages = {261--270}, publisher = {Springer}, series = {Studies in Classification, Data Analysis, and Knowledge Organization}, title = {Mining Association Rules in Folksonomies}, year = 2006 } @article{brewster2007dio, author = {Brewster, C. and Iria, J. and Zhang, Z. and Ciravegna, F. and Guthrie, L. and Wilks, Y.}, interhash = {2718d2549472483902eb05db48b4eee9}, intrahash = {0ca836cd4474b773b7329fd65770049d}, journal = {Recent Advances in Natural Language Processing (RANLP 07)}, title = {{Dynamic iterative ontology learning}}, year = 2007 } @article{iria2006itp, author = {Iria, J. and Brewster, C. and Ciravegna, F. and Wilks, Y.}, interhash = {092944c0f8d3a0ac6ff55ff0c49f478c}, intrahash = {6f7371abe4beb8fd57b733710d90a202}, journal = {Proc. of LREC2006, Genoa}, title = {{An incremental tri-partite approach to ontology learning}}, year = 2006 } @book{jordan-learning-98, editor = {Jordan, M.}, interhash = {dca14c475ead34e75711dfe8bb911d96}, intrahash = {101d8938173add30b69dd1f4872e6eb7}, publisher = {MIT Press}, title = {Learning in Graphical Models}, year = 1998 } @book{gilks1996markov, author = {Gilks, W.R. and Spiegelhalter, DJ}, interhash = {152f39e8e21e5da1545e74b32b6c4e76}, intrahash = {5c410a2edd204cd117776f6d7f2fea5f}, publisher = {Chapman \& Hall/CRC}, title = {{Markov chain Monte Carlo in practice}}, url = {http://scholar.google.de/scholar.bib?q=info:AN5YKWErdFAJ:scholar.google.com/&output=citation&hl=de&ct=citation&cd=0}, year = 1996 } @inproceedings{1103922, abstract = {Sophisticated computer graphics applications require complex models of appearance, motion, natural phenomena, and even artistic style. Such models are often difficult or impossible to design by hand. Recent research demonstrates that, instead, we can "learn" a dynamical and/or appearance model from captured data, and then synthesize realistic new data from the model. For example, we can capture the motions of a human actor and then generate new motions as they might be performed by that actor. Bayesian reasoning is a fundamental tool of machine learning and statistics, and it provides powerful tools for solving otherwise-difficult problems of learning about the world from data. Beginning from first principles, this course develops the general methodologies for designing learning algorithms and describes their application to several problems in graphics.}, address = {New York, NY, USA}, author = {Hertzmann, Aaron}, booktitle = {SIGGRAPH '04: ACM SIGGRAPH 2004 Course Notes}, doi = {http://doi.acm.org/10.1145/1103900.1103922}, interhash = {261f4139fd56f371c7ff828b5f2d6df8}, intrahash = {b3e68fb8932bb0cca51bc56a36857bf7}, location = {Los Angeles, CA}, pages = 22, publisher = {ACM}, title = {Introduction to Bayesian learning}, url = {http://portal.acm.org/citation.cfm?id=1103900.1103922}, year = 2004 } @techreport{jordan2003, author = {Jordan, Michael I.}, interhash = {979f98f68a4b6057a4242f7d432c6762}, intrahash = {a71abfadd0bb0e2f0e0ff8f6b98c27b6}, title = {Learning in graphical models}, year = 2003 } @inproceedings{LA:09, author = {Lemmerich, Florian and Atzmueller, Martin}, booktitle = {Proc. LeGo-09: From Local Patterns to Global Models, Workshop at the 2009 European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases}, editor = {Knobbe, Johannes F\"urnkranz Arno}, interhash = {b3b0f2ecf02d4b58d787b29fdfdbe8e0}, intrahash = {0c6a9543bb5381a57baf8837db7b4249}, note = {accepted}, title = {{Incorporating Exceptions: Efficient Mining of epsilon-Relevant Subgroup Patterns}}, year = 2009 } @inproceedings{tane04semantic, author = {Tane, Julien and Schmitz, Christoph and Stumme, Gerd}, bibsource = {DBLP, http://dblp.uni-trier.de}, booktitle = {Proc. 13th International World Wide Web Conference (WWW 2004)}, ee = {http://doi.acm.org/10.1145/1013369}, interhash = {92089bd1e3e798b50fe7c82cd8333e7b}, intrahash = {10efb9940c6f2881cd6c84b6c52bebd8}, pages = {1-10}, title = {Semantic resource management for the web: an e-learning application}, url = {http://www.www2004.org/proceedings/docs/2p1.pdf}, year = 2004 } @inproceedings{plangprasopchok2009, abstract = {Automatic folksonomy construction from tags has attracted much attention recently. However, inferring hierarchical relations between concepts from tags has a drawback in that it is difficult to distinguish between more popular and more general concepts. Instead of tags we propose to use user-specified relations for learning folksonomy. We explore two statistical frameworks for aggregating many shallow individual hierarchies, expressed through the collection/set relations on the social photosharing site Flickr, into a common deeper folksonomy that reflects how a community organizes knowledge. Our approach addresses a number of challenges that arise while aggregating information from diverse users, namely noisy vocabulary, and variations in the granularity level of the concepts expressed. Our second contribution is a method for automatically evaluating learned folksonomy by comparing it to a reference taxonomy, e.g., the Web directory created by the Open Directory Project. Our empirical results suggest that user-specified relations are a good source of evidence for learning folksonomies.}, address = {New York, NY, USA}, author = {Plangprasopchok, A. and Lerman, K.}, booktitle = {WWW '09: Proceedings of the 18th international conference on World wide web}, doi = {http://doi.acm.org/10.1145/1526709.1526814}, interhash = {fccd894a82edb040d7438d6da91e3ebe}, intrahash = {559ee9d48f1a510f56765b2357aa8ea5}, isbn = {978-1-60558-487-4}, location = {Madrid, Spain}, pages = {781--790}, publisher = {ACM}, title = {Constructing folksonomies from user-specified relations on flickr}, url = {http://www2009.org/proceedings/pdf/p781.pdf}, year = 2009 } @incollection{Voelker2008, author = {V\"{o}lker, J. and Haase, P. and Hitzler, P.}, booktitle = {Ontology Learning and Population: Bridging the Gap between Text and Knowledge}, interhash = {cf638e52ae5c4f9af9d35d8faee72c16}, intrahash = {f80af45b8659db1a4327a5ce1df3f267}, owner = {blev}, publisher = {IOS Press}, series = {Frontiers in Artificial Intelligence and Applications}, timestamp = {2009.02.22}, title = {Learning Expressive Ontologies}, year = 2008 } @inproceedings{5273871, abstract = {This paper introduces WikiOnto: a system that assists in the extraction and modeling of topic ontologies in a semi-automatic manner using a preprocessed document corpus derived from Wikipedia. Based on the Wikipedia XML Corpus, we present a three-tiered framework for extracting topic ontologies in quick time and a modeling environment to refine these ontologies. Using natural language processing (NLP) and other machine learning (ML) techniques along with a very rich document corpus, this system proposes a solution to a task that is generally considered extremely cumbersome. The initial results of the prototype suggest strong potential of the system to become highly successful in ontology extraction and modeling and also inspire further research on extracting ontologies from other semi-structured document corpora as well.}, author = {Silva, L. De and Jayaratne, L.}, booktitle = {Applications of Digital Information and Web Technologies, 2009. ICADIWT '09. Second International Conference on the}, doi = {10.1109/ICADIWT.2009.5273871}, interhash = {c1996cb9e69de56e2bb2f8e763fe0482}, intrahash = {66bec053541e521fbe68c0119806ae49}, month = {Aug.}, pages = {446-451}, title = {Semi-automatic extraction and modeling of ontologies using Wikipedia XML Corpus}, url = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?isnumber=5273826&arnumber=5273871&count=156&index=116}, year = 2009 } @article{ieKey, author = {Cimiano, Philipp and V"olker, Johanna and Studer, Rudi}, date = {October 2006}, interhash = {0007185516cf6c93931a11bc13c55f3f}, intrahash = {bd70c98a41d8cc01464dd022dfd118b6}, journal = {Information, Wissenschaft und Praxis}, number = {6-7}, pages = {315-320}, title = {Ontologies on Demand? -A Description of the State-of-the-Art, Applications, Challenges and Trends for Ontology Learning from Text Information}, url = {http://www.aifb.uni-karlsruhe.de/Publikationen/showPublikation?publ_id=1282}, volume = 57, year = 2006 } @article{Brewster:2009:BMC-Bioinformatics:19426458, abstract = {BACKGROUND: Ontology construction for any domain is a labour intensive and complex process. Any methodology that can reduce the cost and increase efficiency has the potential to make a major impact in the life sciences. This paper describes an experiment in ontology construction from text for the animal behaviour domain. Our objective was to see how much could be done in a simple and relatively rapid manner using a corpus of journal papers. We used a sequence of pre-existing text processing steps, and here describe the different choices made to clean the input, to derive a set of terms and to structure those terms in a number of hierarchies. We describe some of the challenges, especially that of focusing the ontology appropriately given a starting point of a heterogeneous corpus. RESULTS: Using mainly automated techniques, we were able to construct an 18055 term ontology-like structure with 73% recall of animal behaviour terms, but a precision of only 26%. We were able to clean unwanted terms from the nascent ontology using lexico-syntactic patterns that tested the validity of term inclusion within the ontology. We used the same technique to test for subsumption relationships between the remaining terms to add structure to the initially broad and shallow structure we generated. All outputs are available at http://thirlmere.aston.ac.uk/\~kiffer/animalbehaviour/. CONCLUSION: We present a systematic method for the initial steps of ontology or structured vocabulary construction for scientific domains that requires limited human effort and can make a contribution both to ontology learning and maintenance. The method is useful both for the exploration of a scientific domain and as a stepping stone towards formally rigourous ontologies. The filtering of recognised terms from a heterogeneous corpus to focus upon those that are the topic of the ontology is identified to be one of the main challenges for research in ontology learning.}, author = {Brewster, C and Jupp, S and Luciano, J and Shotton, D and Stevens, R D and Zhang, Z}, doi = {10.1186/1471-2105-10-S5-S1}, interhash = {f4b4e74631a837df6c3d102731ec46c3}, intrahash = {e9a83a729df52557d560ad98404774c3}, journal = {BMC Bioinformatics}, pmid = {19426458}, title = {Issues in learning an ontology from text}, url = {http://www.ncbi.nlm.nih.gov/pubmed/19426458}, volume = {10 Suppl 5}, year = 2009 } @phdthesis{david2007domain, address = {Saarbrücken}, author = {Sánchez, David}, interhash = {997c9d66a7ef8844f410919ccecc4768}, intrahash = {504c0b73b391933fb0536b135144ae1d}, isbn = {9783836470698 3836470691}, pages = {--}, publisher = {VDM Verlag Dr. Müller}, refid = {426144281}, title = {Domain ontology learning from the web an unsupervised, automatic and domain independent approach}, url = {http://www.worldcat.org/search?qt=worldcat_org_all&q=3836470691}, year = 2007 } @article{1179190, abstract = { Our OntoLearn system is an infrastructure for automated ontology learning from domain text. It is the only system, as far as we know, that uses natural language processing and machine learning techniques, and is part of a more general ontology engineering architecture. We describe the system and an experiment in which we used a machine-learned tourism ontology to automatically translate multiword terms from English to Italian. The method can apply to other domains without manual adaptation.}, author = {Navigli, R. and Velardi, P. and Gangemi, A.}, doi = {10.1109/MIS.2003.1179190}, interhash = {a3f49b0b169c0e2b6ea1e3ed7e87f957}, intrahash = {d70653a1a21b9e84904def9d2fdb5151}, issn = {1541-1672}, journal = {Intelligent Systems, IEEE}, month = {Jan-Feb}, number = 1, pages = { 22-31}, title = {Ontology learning and its application to automated terminology translation}, url = {http://ieeexplore.ieee.org/search/wrapper.jsp?arnumber=1179190}, volume = 18, year = 2003 } @inproceedings{666125, address = {London, UK}, author = {Brewster, Christopher and Ciravegna, Fabio and Wilks, Yorick}, booktitle = {NLDB '02: Proceedings of the 6th International Conference on Applications of Natural Language to Information Systems-Revised Papers}, interhash = {7311591ca9cf74ff3d0817507a18f315}, intrahash = {331564c7d3891041c1024591532a45ec}, isbn = {3-540-00307-X}, pages = {203--207}, publisher = {Springer-Verlag}, title = {User-Centred Ontology Learning for Knowledge Management}, url = {http://portal.acm.org/citation.cfm?id=666125}, year = 2002 } @inproceedings{conf/icail/LenciMPV07, author = {Lenci, Alessandro and Montemagni, Simonetta and Pirrelli, Vito and Venturi, Giulia}, booktitle = {LOAIT}, crossref = {conf/icail/2007loait}, date = {2008-06-06}, editor = {Casanovas, Pompeu and Biasiotti, Maria Angela and Francesconi, Enrico and Sagri, Maria-Teresa}, ee = {http://ceur-ws.org/Vol-321/paper07.pdf}, interhash = {5649c87c6612fc0df9031536e6fe6d55}, intrahash = {f113eb70fed0141d87672429cb27bba3}, pages = {113-129}, publisher = {CEUR-WS.org}, series = {CEUR Workshop Proceedings}, title = {NLP-based Ontology Learning from Legal Texts. A Case Study.}, url = {http://dblp.uni-trier.de/db/conf/icail/loait2007.html#LenciMPV07}, volume = 321, year = 2007 } @inproceedings{Ome01, author = {Omelayenko, Borys}, booktitle = {Proceedings of the International Workshop on Web Dynamics, held in conj. with the 8th International Conference on Database Theory (ICDT’01), London, UK}, interhash = {011d45b904b02fdf1a65122d2832710b}, intrahash = {3edf80da8b39eefeea46379581628adf}, title = {Learning of Ontologies for the Web: the Analysis of Existent Approaches}, url = {http://www.dcs.bbk.ac.uk/webDyn/webDynPapers/omelayenko.pdf}, year = 2001 } @article{375731, abstract = {A data-integration system provides access to a multitude of data sources through a single mediated schema. A key bottleneck in building such systems has been the laborious manual construction of semantic mappings between the source schemas and the mediated schema. We describe LSD, a system that employs and extends current machine-learning techniques to semi-automatically find such mappings. LSD first asks the user to provide the semantic mappings for a small set of data sources, then uses these mappings together with the sources to train a set of learners. Each learner exploits a different type of information either in the source schemas or in their data. Once the learners have been trained, LSD finds semantic mappings for a new data source by applying the learners, then combining their predictions using a meta-learner. To further improve matching accuracy, we extend machine learning techniques so that LSD can incorporate domain constraints as an additional source of knowledge, and develop a novel learner that utilizes the structural information in XML documents. Our approach thus is distinguished in that it incorporates multiple types of knowledge. Importantly, its architecture is extensible to additional learners that may exploit new kinds of information. We describe a set of experiments on several real-world domains, and show that LSD proposes semantic mappings with a high degree of accuracy.}, address = {New York, NY, USA}, author = {Doan, AnHai and Domingos, Pedro and Halevy, Alon Y.}, doi = {http://doi.acm.org/10.1145/376284.375731}, interhash = {1550f1948858bf8b315ea2fc6ed789cd}, intrahash = {29e7660361ca79b97b00e5db51fb66ee}, issn = {0163-5808}, journal = {SIGMOD Rec.}, number = 2, pages = {509--520}, publisher = {ACM}, title = {Reconciling schemas of disparate data sources: a machine-learning approach}, url = {http://portal.acm.org/citation.cfm?id=375731&dl=GUIDE&coll=GUIDE&CFID=75153142&CFTOKEN=89522229}, volume = 30, year = 2001 } @inproceedings{conf/Rudolph07, address = {Berlin, Heidelberg}, author = {Rudolph, Sebastian and Völker, Johanna and Hitzler, Pascal}, booktitle = {Proceedings of the 15th International Conference on Conceptual Structures (ICCS 2007)}, crossref = {conf/iccs/2006}, editor = {Priss, Uta and Polovina, Simon and Hill, Richard}, interhash = {95939c2e69ef57fcf65e93df6010fe60}, intrahash = {06b7dbf2f1ae4a442bb1559c499dae16}, isbn = {3-540-73680-8}, month = {July}, pages = {488-491}, publisher = {Springer-Verlag}, series = {Lecture Notes in Artificial Intelligence}, title = {Supporting Lexical Ontology Learning by Relational Exploration}, url = {http://www.aifb.uni-karlsruhe.de/WBS/phi/resources/publications/iccs07-relexp.pdf}, volume = 4604, year = 2007 } @techreport{Gomez-Perez_OntoWeb03, author = {{G{\'o}mez-P{\'e}rez}, Asuncion and Manzano-Macho, David}, file = {Gomez-Perez_OntoWeb03.pdf:Gomez_Perez/Gomez-Perez_OntoWeb03.pdf:PDF}, institution = {OntoWeb Consortium}, interhash = {8ee5304684f3b0974890a7427c2438ae}, intrahash = {6b56e7f1d2b3913be8a04a09c6d566c1}, number = {1.5}, title = {A survey of ontology learning methods and techniques}, type = {Deliverable}, url = {http://www.deri.at/fileadmin/documents/deliverables/Ontoweb/D1.5.pdf}, year = 2003 } @inproceedings{1661779, abstract = {A folksonomy refers to a collection of user-defined tags with which users describe contents published on the Web. With the flourish of Web 2.0, folksonomies have become an important mean to develop the Semantic Web. Because tags in folksonomies are authored freely, there is a need to understand the structure and semantics of these tags in various applications. In this paper, we propose a learning approach to create an ontology that captures the hierarchical semantic structure of folksonomies. Our experimental results on two different genres of real world data sets show that our method can effectively learn the ontology structure from the folksonomies.}, address = {San Francisco, CA, USA}, author = {Tang, Jie and fung Leung, Ho and Luo, Qiong and Chen, Dewei and Gong, Jibin}, booktitle = {IJCAI'09: Proceedings of the 21st international jont conference on Artifical intelligence}, interhash = {17f95a6ba585888cf45443926d8b7e98}, intrahash = {7b335f08a288a79eb70eff89f1ec7630}, location = {Pasadena, California, USA}, pages = {2089--2094}, publisher = {Morgan Kaufmann Publishers Inc.}, title = {Towards ontology learning from folksonomies}, url = {http://ijcai.org/papers09/Papers/IJCAI09-344.pdf}, year = 2009 } @inproceedings{rattenbury2007towards, abstract = {We describe an approach for extracting semantics of tags, unstructured text-labels assigned to resources on the Web, based on each tag's usage patterns. In particular, we focus on the problem of extracting place and event semantics for tags that are assigned to photos on Flickr, a popular photo sharing website that supports time and location (latitude/longitude) metadata. We analyze two methods inspired by well-known burst-analysis techniques and one novel method: Scale-structure Identification. We evaluate the methods on a subset of Flickr data, and show that our Scale-structure Identification method outperforms the existing techniques. The approach and methods described in this work can be used in other domains such as geo-annotated web pages, where text terms can be extracted and associated with usage patterns.}, address = {New York, NY, USA}, author = {Rattenbury, Tye and Good, Nathaniel and Naaman, Mor}, booktitle = {SIGIR '07: Proceedings of the 30th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval}, doi = {10.1145/1277741.1277762}, interhash = {8b02d2b3fdbb97c3db6e3b23079a56e5}, intrahash = {bf6f73d2ef74ca6f1d355fb5688b673c}, isbn = {978-1-59593-597-7}, pages = {103--110}, publisher = {ACM Press}, title = {Towards automatic extraction of event and place semantics from flickr tags}, url = {http://dx.doi.org/10.1145/1277741.1277762}, year = 2007 } @inproceedings{cattuto2008semantic, abstract = {Social bookmarking systems allow users to organise collections of resources on the Web in a collaborative fashion. The increasing popularity of these systems as well as first insights into their emergent semantics have made them relevant to disciplines like knowledge extraction and ontology learning. The problem of devising methods to measure the semantic relatedness between tags and characterizing it semantically is still largely open. Here we analyze three measures of tag relatedness: tag co-occurrence, cosine similarity of co-occurrence distributions, and FolkRank, an adaptation of the PageRank algorithm to folksonomies. Each measure is computed on tags from a large-scale dataset crawled from the social bookmarking system del.icio.us. To provide a semantic grounding of our findings, a connection to WordNet (a semantic lexicon for the English language) is established by mapping tags into synonym sets of WordNet, and applying there well-known metrics of semantic similarity. Our results clearly expose different characteristics of the selected measures of relatedness, making them applicable to different subtasks of knowledge extraction such as synonym detection or discovery of concept hierarchies.}, address = {Patras, Greece}, author = {Cattuto, Ciro and Benz, Dominik and Hotho, Andreas and Stumme, Gerd}, booktitle = {Proceedings of the 3rd Workshop on Ontology Learning and Population (OLP3)}, interhash = {cc62b733f6e0402db966d6dbf1b7711f}, intrahash = {3b0aca61b24e4343bd80390614e3066e}, isbn = {978-960-89282-6-8}, month = jul, pages = {39--43}, title = {Semantic Analysis of Tag Similarity Measures in Collaborative Tagging Systems}, url = {http://olp.dfki.de/olp3/}, year = 2008 } @proceedings{30474, author = {Tresp, Volker and Bundschus, Markus and Rettinger, Achim and Huang, Yi}, interhash = {e27fbf5b5fb16f66cd0c7a3932fc4695}, intrahash = {006468688804bc3563225b8dcd7aea97}, journal = {Uncertainty Reasoning for the Semantic Web I Lecture Notes in AI}, publisher = {Springer}, title = {Towards machine learning on the semantic web}, url = {http://wwwbrauer.informatik.tu-muenchen.de/~trespvol/papers/LearningRDF23.pdf}, year = 2008 } @inproceedings{conf/grc/ZhangZ05b, author = {Zhang, Min-Ling and Zhou, Zhi-Hua}, booktitle = {GrC}, crossref = {conf/grc/2005}, date = {2007-03-22}, editor = {Hu, Xiaohua and Liu, Qing and Skowron, Andrzej and Lin, Tsau Young and Yager, Ronald R. and Zhang, Bo}, ee = {http://doi.ieeecomputersociety.org/10.1109/GRC.2005.1547385}, interhash = {3c58a80457442249a999a2ceea877565}, intrahash = {3460603745790c4824a9bdb572e64777}, isbn = {0-7803-9017-2}, pages = {718-721}, publisher = {IEEE}, title = {A k-nearest neighbor based algorithm for multi-label classification.}, url = {http://cs.nju.edu.cn/zhouzh/zhouzh.files/publication/grc05.pdf}, year = 2005 } @inproceedings{benz07ontology, abstract = {The emergence of collaborative tagging systems with their underlying flat and uncontrolled resource organization paradigm has led to a large number of research activities focussing on a formal description and analysis of the resulting "folksonomies". An interesting outcome is that the characteristic qualities of these systems seem to be inverse to more traditional knowledge structuring approaches like taxonomies or ontologies: The latter provide rich and precise semantics, but suffer - amongst others - from a knowledge acquisition bottleneck. An important step towards exploiting the possible synergies by bridging the gap between both paradigms is the automatic extraction of relations between tags in a folksonomy. This position paper presents preliminary results of ongoing work to induce hierarchical relationships among tags by analyzing the aggregated data of collaborative tagging systems as a basis for an ontology learning procedure. }, address = {Halle/Saale}, author = {Benz, Dominik and Hotho, Andreas}, booktitle = {Workshop Proceedings of Lernen - Wissensentdeckung - Adaptivität (LWA 2007)}, editor = {Hinneburg, Alexander}, interhash = {ff7de5717f771dabd764675279ff3adf}, intrahash = {72bff5ebe5dfb5023f62ba9b94e6ed01}, isbn = {978-3-86010-907-6}, month = sep, pages = {109--112}, publisher = {Martin-Luther-Universität Halle-Wittenberg}, title = {Position Paper: Ontology Learning from Folksonomies}, url = {http://lwa07.informatik.uni-halle.de/kdml07/kdml07.htm}, year = 2007 } @mastersthesis{illig2008machine, address = {Kassel}, author = {Illig, Jens}, interhash = {65c16443f45ffd46175f68d14b4f809a}, intrahash = {9a65067da65e8301182b33b4ae292141}, school = {University of Kassel}, title = {Machine Learnability Analysis of Textclassifications in a Social Bookmarking Folksonomy}, type = {Bachelor Thesis}, year = 2008 } @article{citeulike:2146554, abstract = {Nowadays, multi-label classification methods are increasingly required by modern applications, such as protein function classification, music categorization and semantic scene classification. This paper introduces the task of multi-label classification, organizes the sparse related literature into a structured presentation and performs comparative experimental results of certain multi-label classification methods. It also contributes the definition of concepts for the quantification of the multi-label nature of a data set.}, author = {Tsoumakas, G. and Katakis, I.}, citeulike-article-id = {2146554}, editor = {Taniar, David}, interhash = {f8e6c4b6b3df7461d070a1a9cc1d15c1}, intrahash = {52c3b18481f5146e4c213d609c1143fc}, journal = {International Journal of Data Warehouse and Mining}, number = 3, pages = {1--13}, posted-at = {2007-12-19 13:38:29}, priority = {2}, publisher = {Idea Group Publishing}, title = {Multi Label Classification: An Overview}, volume = 3, year = 2007 } @mastersthesis{illig2008machine, address = {Kassel}, author = {Illig, Jens}, interhash = {65c16443f45ffd46175f68d14b4f809a}, intrahash = {9a65067da65e8301182b33b4ae292141}, school = {University of Kassel}, title = {Machine Learnability Analysis of Textclassifications in a Social Bookmarking Folksonomy}, type = {Bachelor Thesis}, year = 2008 } @inproceedings{1220215, address = {Morristown, NJ, USA}, author = {Turney, Peter D.}, booktitle = {ACL-44: Proceedings of the 21st International Conference on Computational Linguistics and the 44th annual meeting of the Association for Computational Linguistics}, doi = {http://dx.doi.org/10.3115/1220175.1220215}, interhash = {999a7acd6d1752f04fc1b4536e6d13b2}, intrahash = {bbe58563bc09558f2ef97ec181b74358}, location = {Sydney, Australia}, pages = {313--320}, publisher = {Association for Computational Linguistics}, title = {Expressing implicit semantic relations without supervision}, url = {http://portal.acm.org/citation.cfm?id=1220215}, year = 2006 } @article{keyhere, abstract = {Abstract  Ontology is one of the fundamental cornerstones of the semantic Web. The pervasive use of ontologies in information sharing and knowledge management calls for efficient and effective approaches to ontology development. Ontology learning, which seeksto discover ontological knowledge from various forms of data automatically or semi-automatically, can overcome the bottleneckof ontology acquisition in ontology development. Despite the significant progress in ontology learning research over the pastdecade, there remain a number of open problems in this field. This paper provides a comprehensive review and discussion ofmajor issues, challenges, and opportunities in ontology learning. We propose a new learning-oriented model for ontology developmentand a framework for ontology learning. Moreover, we identify and discuss important dimensions for classifying ontology learningapproaches and techniques. In light of the impact of domain on choosing ontology learning approaches, we summarize domaincharacteristics that can facilitate future ontology learning effort. The paper offers a road map and a variety of insightsabout this fast-growing field.}, author = {Zhou, Lina}, interhash = {78b6d3db998dcd27c475dfff3816f48f}, intrahash = {95b0f4f7c9c628e032d8bb4c69b432ed}, journal = {Information Technology and Management}, month = {#sep#}, number = 3, pages = {241--252}, title = {Ontology learning: state of the art and open issues}, url = {http://dx.doi.org/10.1007/s10799-007-0019-5}, volume = 8, year = 2007 } @article{journals/jmlr/AndoZ05, author = {Ando, Rie Kubota and Zhang, Tong}, date = {2007-02-21}, ee = {http://www.jmlr.org/papers/v6/ando05a.html}, interhash = {9eb1e2f4692380af430abc5f2e70382b}, intrahash = {d467130ff1cb750316fd8e5ff6c0abb8}, journal = {Journal of Machine Learning Research}, pages = {1817-1853}, title = {A Framework for Learning Predictive Structures from Multiple Tasks and Unlabeled Data.}, url = {http://jmlr.csail.mit.edu/papers/volume6/ando05a/ando05a.pdf}, volume = 6, year = 2005 } @inproceedings{PuWang:2007, abstract = {The exponential growth of text documents available on the Internet has created an urgent need for accurate, fast, and general purpose text classification algorithms. However, the "bag of words" representation used for these classification methods is often unsatisfactory as it ignores relationships between important terms that do not co-occur literally. In order to deal with this problem, we integrate background knowledge - in our application: Wikipedia - into the process of classifying text documents. The experimental evaluation on Reuters newsfeeds and several other corpus shows that our classification results with encyclopedia knowledge are much better than the baseline "bag of words " methods.}, author = {Wang, Pu and Hu, Jian and Zeng, Hua-Jun and Chen, Lijun and Chen, Zheng}, booktitle = {Data Mining, 2007. ICDM 2007. Seventh IEEE International Conference on}, doi = {10.1109/ICDM.2007.77}, interhash = {8a899b60047e20e162fc12b2ff6f8142}, intrahash = {66058efbca5abd1222f72c32365d23fa}, isbn = {978-0-7695-3018-5}, issn = {1550-4786}, pages = {332-341}, title = {Improving Text Classification by Using Encyclopedia Knowledge}, url = {ftp://ftp.computer.org/press/outgoing/proceedings/icdm07/Data/3018a332.pdf}, year = 2007 } @misc{ailon-2007, abstract = { This paper describes an efficient reduction of the learning problem of ranking to binary classification. The reduction guarantees an average pairwise misranking regret of at most that of the binary classifier regret, improving a recent result of Balcan et al which only guarantees a factor of 2. Moreover, our reduction applies to a broader class of ranking loss functions, admits a simpler proof, and the expected running time complexity of our algorithm in terms of number of calls to a classifier or preference function is improved from $\Omega(n^2)$ to $O(n \log n)$. In addition, when the top $k$ ranked elements only are required ($k \ll n$), as in many applications in information extraction or search engines, the time complexity of our algorithm can be further reduced to $O(k \log k + n)$. Our reduction and algorithm are thus practical for realistic applications where the number of points to rank exceeds several thousands. Much of our results also extend beyond the bipartite case previously studied.}, author = {Ailon, Nir and Mohri, Mehryar}, interhash = {b102fea8a5381448d5b624aa2b82bc50}, intrahash = {d8bd1b99e3c245d17b577514727ebff2}, title = {An efficient reduction of ranking to classification}, url = {http://www.citebase.org/abstract?id=oai:arXiv.org:0710.2889}, year = 2007 } @inproceedings{nldb05, address = {Alicante, Spain}, author = {Cimiano, Philipp and Völker, Johanna}, booktitle = {Proceedings of the 10th International Conference on Applications of Natural Language to Information Systems (NLDB)}, editor = {Montoyo, Andres and Munoz, Rafael and Metais, Elisabeth}, interhash = {c90cb094c9f4f3cca1214d0478ffeb07}, intrahash = {072436e5adc4f5fdc39f4baeaa55b077}, month = JUN, pages = {227-238}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, title = {Text2Onto - A Framework for Ontology Learning and Data-driven Change Discovery}, url = {\url{http://www.aifb.uni-karlsruhe.de/WBS/jvo/publications/Text2Onto_nldb_2005.pdf}}, volume = 3513, year = 2005 } @article{1282, author = {Cimiano, Philipp and Völker, Johanna and Studer, Rudi}, interhash = {aeb553dc2e190f0a5974dfdc709d450a}, intrahash = {fe4c2950b5be221b493e29e4339240e8}, journal = {Information, Wissenschaft und Praxis}, month = OCT, note = {see the special issue for more contributions related to the Semantic Web}, number = {6-7}, pages = {315-320}, title = {Ontologies on Demand? - A Description of the State-of-the-Art, Applications, Challenges and Trends for Ontology Learning from Text}, url = {\url{http://www.aifb.uni-karlsruhe.de/WBS/pci/Publications/iwp06.pdf}}, volume = 57, year = 2006 } @misc{cattuto-2008, abstract = { Social bookmarking systems allow users to organise collections of resources on the Web in a collaborative fashion. The increasing popularity of these systems as well as first insights into their emergent semantics have made them relevant to disciplines like knowledge extraction and ontology learning. The problem of devising methods to measure the semantic relatedness between tags and characterizing it semantically is still largely open. Here we analyze three measures of tag relatedness: tag co-occurrence, cosine similarity of co-occurrence distributions, and FolkRank, an adaptation of the PageRank algorithm to folksonomies. Each measure is computed on tags from a large-scale dataset crawled from the social bookmarking system del.icio.us. To provide a semantic grounding of our findings, a connection to WordNet (a semantic lexicon for the English language) is established by mapping tags into synonym sets of WordNet, and applying there well-known metrics of semantic similarity. Our results clearly expose different characteristics of the selected measures of relatedness, making them applicable to different subtasks of knowledge extraction such as synonym detection or discovery of concept hierarchies.}, author = {Cattuto, Ciro and Benz, Dominik and Hotho, Andreas and Stumme, Gerd}, interhash = {cc62b733f6e0402db966d6dbf1b7711f}, intrahash = {78fd64c3db55e6387ebdeb6c40054542}, title = {Semantic Analysis of Tag Similarity Measures in Collaborative Tagging Systems}, url = {http://www.citebase.org/abstract?id=oai:arXiv.org:0805.2045}, year = 2008 } @article{charniak97statistical, author = {Charniak, Eugene}, interhash = {9f0c334b655cfa509f9862a0569cd375}, intrahash = {1d02e8f9d663f5cd8203ec6685a958ed}, journal = {AI Magazine}, number = 4, pages = {33-44}, title = {Statistical Techniques for Natural Language Parsing}, url = {http://citeseer.ist.psu.edu/article/charniak97statistical.html}, volume = 18, year = 1997 }