@inproceedings{zhang2004web, abstract = {We address the problem of integrating objects from a source taxonomy into a master taxonomy. This problem is not only currently pervasive on the web, but also important to the emerging semantic web. A straightforward approach to automating this process would be to train a classifier for each category in the master taxonomy, and then classify objects from the source taxonomy into these categories. In this paper we attempt to use a powerful classification method, Support Vector Machine (SVM), to attack this problem. Our key insight is that the availability of the source taxonomy data could be helpful to build better classifiers in this scenario, therefore it would be beneficial to do transductive learning rather than inductive learning, i.e., learning to optimize classification performance on a particular set of test examples. Noticing that the categorizations of the master and source taxonomies often have some semantic overlap, we propose a method, Cluster Shrinkage (CS), to further enhance the classification by exploiting such implicit knowledge. Our experiments with real-world web data show substantial improvements in the performance of taxonomy integration.}, address = {New York, NY, USA}, author = {Zhang, Dell and Lee, Wee Sun}, booktitle = {WWW '04: Proceedings of the 13th international conference on World Wide Web}, file = {zhang2004web.pdf:zhang2004web.pdf:PDF}, interhash = {7edb6ba8814bb3382ffaeb009d5a3183}, intrahash = {b683a9061fe344cfbcd62aa85e44d2c4}, lastdatemodified = {2005-08-07}, lastname = {Zhang}, own = {own}, pages = {472--481}, pdf = {zhang04.pdf}, publisher = {ACM Press}, read = {notread}, title = {Web taxonomy integration using support vector machines}, url = {zhang04.ps}, year = 2004 } @inproceedings{joachims1999transductive, abstract = {This paper introduces Transductive Support Vector Machines (TSVMs) for text classifi­ cation. While regular Support Vector Ma­ chines (SVMs) try to induce a general deci­ sion function for a learning task, Transduc­ tive Support Vector Machines take into ac­ count a particular test set and try to mini­ mize misclassifications of just those particu­ lar examples. The paper presents an anal­ ysis of why TSVMs are well suited for text classification. These theoretical findings are supported by experiments on three test col­ lections. The experiments show substantial improvements over inductive methods, espe­ cially for small training sets, cutting the num­ ber of labeled training examples down to a twentieth on some tasks. This work also pro­ poses an algorithm for training TSVMs effi­ ciently, handling 10,000 examples and more.}, address = {Bled, SL}, author = {Joachims, Thorsten}, booktitle = {Proceedings of {ICML}-99, 16th International Conference on Machine Learning}, editor = {Bratko, Ivan and Dzeroski, Saso}, file = {joachims1999transductive.pdf:joachims1999transductive.pdf:PDF}, interhash = {7b7737625402ab3e8db5f53d1de716d6}, intrahash = {7cf3e7981cac898c1745418db83e0fd6}, lastdatemodified = {2005-08-06}, lastname = {Joachims}, own = {own}, pages = {200--209}, pdf = {joachims99.pdf}, publisher = {Morgan Kaufmann Publishers, San Francisco, US}, read = {notread}, title = {Transductive Inference for Text Classification using Support Vector Machines}, url = {joachims99.ps}, year = 1999 } @inproceedings{joachims1998text, abstract = {This paper explores the user of Support Vector machines (SVMs) for learning text classifiers from examples. It analyzes the particular properties of learning with text data and identifies, why SVMs are appropriate for this task. Empirical results support the theoretical findings. SVMs achieve substantial improvements over the currently best performing methods and they behave robustly over a variety of different learning tasks. Furthermore, they are fully automatic, eliminating the need for manuar parameter tuning.}, address = {Chemnitz, DE}, author = {Joachims, Thorsten}, booktitle = {Proceedings of {ECML}-98, 10th European Conference on Machine Learning}, editor = {N{\'{e}}dellec, Claire and Rouveirol, C{\'{e}}line}, interhash = {997f731cfc4fdb02cb32eb88c4fab2e9}, intrahash = {d31522a42571f0adb834c5956540716e}, lastdatemodified = {2005-08-06}, lastname = {Joachims}, number = 1398, own = {own}, pages = {137--142}, pdf = {joachims98.pdf}, publisher = {Springer Verlag, Heidelberg, DE}, read = {notread}, title = {Text categorization with support vector machines: learning with many relevant features}, url = {joachims98.ps}, year = 1998 }