@article{375731, abstract = {A data-integration system provides access to a multitude of data sources through a single mediated schema. A key bottleneck in building such systems has been the laborious manual construction of semantic mappings between the source schemas and the mediated schema. We describe LSD, a system that employs and extends current machine-learning techniques to semi-automatically find such mappings. LSD first asks the user to provide the semantic mappings for a small set of data sources, then uses these mappings together with the sources to train a set of learners. Each learner exploits a different type of information either in the source schemas or in their data. Once the learners have been trained, LSD finds semantic mappings for a new data source by applying the learners, then combining their predictions using a meta-learner. To further improve matching accuracy, we extend machine learning techniques so that LSD can incorporate domain constraints as an additional source of knowledge, and develop a novel learner that utilizes the structural information in XML documents. Our approach thus is distinguished in that it incorporates multiple types of knowledge. Importantly, its architecture is extensible to additional learners that may exploit new kinds of information. We describe a set of experiments on several real-world domains, and show that LSD proposes semantic mappings with a high degree of accuracy.}, address = {New York, NY, USA}, author = {Doan, AnHai and Domingos, Pedro and Halevy, Alon Y.}, doi = {http://doi.acm.org/10.1145/376284.375731}, interhash = {1550f1948858bf8b315ea2fc6ed789cd}, intrahash = {29e7660361ca79b97b00e5db51fb66ee}, issn = {0163-5808}, journal = {SIGMOD Rec.}, number = 2, pages = {509--520}, publisher = {ACM}, title = {Reconciling schemas of disparate data sources: a machine-learning approach}, url = {http://portal.acm.org/citation.cfm?id=375731&dl=GUIDE&coll=GUIDE&CFID=75153142&CFTOKEN=89522229}, volume = 30, year = 2001 } @inproceedings{conf/icml/RichardsonD03, author = {Richardson, Matt and Domingos, Pedro}, booktitle = {ICML}, crossref = {conf/icml/2003}, date = {2003-09-22}, editor = {Fawcett, Tom and Mishra, Nina}, interhash = {506ac1f610b8ebf7a407f666e3753d8d}, intrahash = {27e697483b19368404dd73945676a1df}, isbn = {1-57735-189-4}, pages = {624-631}, publisher = {AAAI Press}, title = {Learning with Knowledge from Multiple Experts.}, url = {http://dblp.uni-trier.de/db/conf/icml/icml2003.html#RichardsonD03}, year = 2003 } @inproceedings{conf/sigmod/LeeDDHD04, author = {Dhamankar, Robin and Lee, Yoonkyong and Doan, AnHai and Halevy, Alon Y. and Domingos, Pedro}, booktitle = {SIGMOD Conference}, crossref = {conf/sigmod/2004}, date = {2005-02-21}, editor = {Weikum, Gerhard and König, Arnd Christian and Deßloch, Stefan}, ee = {http://doi.acm.org/10.1145/1007568.1007612}, interhash = {ec92ce1b40894b75fb11d45037a54a7c}, intrahash = {fa94bc508e9954107e8603527b66dcb7}, isbn = {1-58113-859-8}, pages = {383-394}, publisher = {ACM}, title = {iMAP: Discovering Complex Mappings between Database Schemas.}, url = {http://www.cs.washington.edu/homes/pedrod/papers/sigmod04.pdf}, year = 2004 } @inproceedings{doan02learning, address = {Honolulu, Hawaii, USA}, author = {Doan, AnHai and Madhavan, Jayant and Domingos, Pedro and Halevy, Alon}, booktitle = {Proceedings to the Eleventh International World Wide}, interhash = {7e4b3e995ef04182c07c14b5988777dc}, intrahash = {b7a6ffa062893317d477dc8edcc17115}, month = May, title = {Learning to Map between Ontologies on the Semantic Web}, url = {http://www.cs.washington.edu/homes/alon/site/files/glue.pdf}, year = 2002 } @inproceedings{domingos2003, author = {Domingos, P. and Abe, Y. and Anderson, C. and Doan, A. and Fox, D. and Halevy, A. and Hulten, G. and Kautz, H. and Lau, T. and Liao, L. and Madhavan, J. and Mausam and Patterson, D. and Richardson, M. and Sanghai, S. and Weld, D. and Wolfman, S.}, booktitle = {Proceedings of the IJCAI-2003 Workshop on Learning Statistical Models from Relational Data}, interhash = {2d2448903dc10789aef8e4238f0cc698}, intrahash = {1a4c5b5688b4295ad19bd72295c25ba2}, location = {Acapulco, Mexico: IJCAII}, title = {Research on Statistical Relational Learning at the University of Washington}, url = {http://www.cs.washington.edu/homes/pedrod/papers/srl03a.pdf}, year = 2003 } @inproceedings{Andersonetal02, author = {Anderson, C.R. and Domingos, P. and Weld, D.S.}, booktitle = {\cite{KDD2002}}, interhash = {cccf0e8f8897dbb717bb6b5b11e6e820}, intrahash = {c4dd1430db4e67a351bc2e22aa531f9d}, location = {Newport Beach, CA}, pages = {143--152}, title = {Relational {M}arkov Models and their Application to Adaptive Web Navigation}, year = 2002 }