@article{375731,
  abstract = {A data-integration system provides access to a multitude of data sources through a single mediated schema. A key bottleneck in building such systems has been the laborious manual construction of semantic mappings between the source schemas and the mediated schema. We describe LSD, a system that employs and extends current machine-learning techniques to semi-automatically find such mappings. LSD first asks the user to provide the semantic mappings for a small set of data sources, then uses these mappings together with the sources to train a set of learners. Each learner exploits a different type of information either in the source schemas or in their data. Once the learners have been trained, LSD finds semantic mappings for a new data source by applying the learners, then combining their predictions using a meta-learner. To further improve matching accuracy, we extend machine learning techniques so that LSD can incorporate domain constraints as an additional source of knowledge, and develop a novel learner that utilizes the structural information in XML documents. Our approach thus is distinguished in that it incorporates multiple types of knowledge. Importantly, its architecture is extensible to additional learners that may exploit new kinds of information. We describe a set of experiments on several real-world domains, and show that LSD proposes semantic mappings with a high degree of accuracy.},
  address = {New York, NY, USA},
  author = {Doan, AnHai and Domingos, Pedro and Halevy, Alon Y.},
  doi = {http://doi.acm.org/10.1145/376284.375731},
  interhash = {1550f1948858bf8b315ea2fc6ed789cd},
  intrahash = {29e7660361ca79b97b00e5db51fb66ee},
  issn = {0163-5808},
  journal = {SIGMOD Rec.},
  number = 2,
  pages = {509--520},
  publisher = {ACM},
  title = {Reconciling schemas of disparate data sources: a machine-learning approach},
  url = {http://portal.acm.org/citation.cfm?id=375731&dl=GUIDE&coll=GUIDE&CFID=75153142&CFTOKEN=89522229},
  volume = 30,
  year = 2001
}

@misc{Medelyan2008,
  abstract = {  Wikipedia is a goldmine of information; not just for its many readers, but
also for the growing community of researchers who recognize it as a resource of
exceptional scale and utility. It represents a vast investment of manual effort
and judgment: a huge, constantly evolving tapestry of concepts and relations
that is being applied to a host of tasks.
  This article provides a comprehensive description of this work. It focuses on
research that extracts and makes use of the concepts, relations, facts and
descriptions found in Wikipedia, and organizes the work into four broad
categories: applying Wikipedia to natural language processing; using it to
facilitate information retrieval and information extraction; and as a resource
for ontology building. The article addresses how Wikipedia is being used as is,
how it is being improved and adapted, and how it is being combined with other
structures to create entirely new resources. We identify the research groups
and individuals involved, and how their work has developed in the last few
years. We provide a comprehensive list of the open-source software they have
produced. We also discuss the implications of this work for the long-awaited
semantic web.
},
  author = {Medelyan, Olena and Legg, Catherine and Milne, David and Witten, Ian H.},
  interhash = {6614c7cd27d80abd691b2ef463941d1c},
  intrahash = {0e7499a4f087f74ad0be674047cf315d},
  note = {cite arxiv:0809.4530
Comment: An extensive survey of re-using information in Wikipedia in natural
  language processing, information retreival and extraction and ontology
  building. submitted},
  title = {Mining Meaning from Wikipedia},
  url = {http://arxiv.org/abs/0809.4530},
  year = 2008
}