@inproceedings{kim2006text, abstract = {This paper presents a series of text-mining algorithms for managing knowledge directory, which is one of the most crucial problems in constructing knowledge management systems today. In future systems, the constructed directory, in which knowledge objects are automatically classified, should evolve so as to provide a good indexing service, as the knowledge collection grows or its usage changes. One challenging issue is how to combine manual and automatic organization facilities that enable a user to flexibly organize obtained knowledge by the hierarchical structure over time. To this end, I propose three algorithms that utilize text mining technologies: semi-supervised classification, semi-supervised clustering, and automatic directory building. Through experiments using controlled document collections, the proposed approach is shown to significantly support hierarchical organization of large electronic knowledge base with minimal human effort}, address = {Berlin, Germany}, author = {Kim, Han-Joon}, booktitle = {Proceedings of the First International Conference on Knowledge Science, Engineering and Management (KSEM'06)}, dateadded = {2006-09-30}, editor = {Lang, J and Lin, F and Wang, J.}, file = {kim2006text.pdf:kim2006text.pdf:PDF}, groups = {public}, interhash = {cd7783b34b37d402830ac0f4477a3c44}, intrahash = {babcd6f6b809e6727a8e7a0d188b4325}, lastdatemodified = {2006-09-30}, lastname = {Han-joon}, month = {August}, own = {notown}, pages = {202-214}, pdf = {kim06-text.pdf}, publisher = {Springer}, read = {notread}, series = {Lecture Notes in Artificial Intelligence}, timestamp = {2007-09-11 13:31:16}, title = {On Text Mining Algorithms for Automated Maintenance of Hierarchical Knowledge Directory}, url = {http://dx.doi.org/10.1007/11811220_18}, username = {dbenz}, volume = 4092, year = 2006 } @article{bille2005survey, author = {Bille, Philip}, file = {bille2005survey.pdf:bille2005survey.pdf:PDF}, interhash = {9de6a5b4195fd08c1ff901c2c7a12e9d}, intrahash = {5099765c5f638e6aeb096dd1d0a44eb7}, journal = {Theor. Comput. Sci.}, lastdatemodified = {2007-04-15}, lastname = {Bille}, number = {1-3}, own = {notown}, pages = {217-239}, read = {notread}, title = {A survey on tree edit distance and related problems.}, url = {http://dblp.uni-trier.de/db/journals/tcs/tcs337.html#Bille05}, volume = 337, year = 2005 } @inproceedings{ziegler2005improving, abstract = {In this work we present topic diversification, a novel method designed to balance and diversify personalized recommenda- tion lists in order to reflect the user�s complete spectrum of interests. Though being detrimental to average accuracy, we show that our method improves user satisfaction with rec- ommendation lists, in particular for lists generated using the common item-based collaborative filtering algorithm. Our work builds upon prior research on recommender sys- tems, looking at properties of recommendation lists as en- tities in their own right rather than specifically focusing on the accuracy of individual recommendations. We introduce the intra-list similarity metric to assess the topical diver- sity of recommendation lists and the topic diversification approach for decreasing the intra-list similarity. We evalu- ate our method using book recommendation data, including online analysis on 361, 349 ratings and an online study in- volving more than 2, 100 subjects.}, address = {Chiba, Japan}, author = {Ziegler, Cai-Nicolas and McNee, Sean and Konstan, Joseph and Lausen, Georg}, booktitle = {Proceedings of the 14th International World Wide Web Conference}, file = {ziegler2005improving.pdf:ziegler2005improving.pdf:PDF}, interhash = {0a7f89e65c4a0a5e45aa69a54a5600e6}, intrahash = {1c70855a788c17e3a94a7ecc00177f6c}, lastdatemodified = {2006-09-30}, lastname = {Ziegler}, month = May, own = {notown}, pdf = {null}, publisher = {ACM Press}, read = {notread}, title = {Improving Recommendation Lists Through Topic Diversification}, year = 2005 } @article{ziegler2006computing, author = {Ziegler, Cai Nicolas and Simon, Kai and Lausen, Georg}, interhash = {74d9b7d218e52533d19ccacfd6d8e948}, intrahash = {bd5647a471cc104c17726488c43fa7f3}, journal = {Proceedings of the WWW2006}, lastdatemodified = {2006-11-29}, lastname = {Ziegler}, note = {submitted}, own = {notown}, read = {notread}, title = {Computing Semantic Proximity Between Concepts Using Taxonomic Knowledge}, year = 2006 } @inproceedings{yeh2006towards, abstract = {This paper discusses the automatic concept hierarchy generation process for specific knowledge network. Traditional concept hierarchy generation uses hierarchical clustering to group similar terms, and the result hierarchy is usually not satisfactory for human being recognition. Human-provided knowledge network presents strong semantic features, but this generation process is both labor-intensive and inconsistent under large scale hierarchy. The method proposed in this paper combines the results of specific knowledge network and automatic concept hierarchy generation, which produces a human-readable, semantic-oriented hierarchy. This generation process can efficiently reduce manual classification efforts, which is an exhausting task for human beings. An evaluation method is also proposed in this paper to verify the quality of the result hierarchy.}, address = {Berlin / Heidelberg}, author = {Yeh, Jian-Hua and hong Sie, Shun}, booktitle = {Advances in Applied Artificial Intelligence}, file = {yeh2006towards.pdf:yeh2006towards.pdf:PDF}, interhash = {fb72d42a46d53453f4809f23a11d10e8}, intrahash = {6b560e955077ba6d790082d37059e14d}, lastdatemodified = {2006-09-30}, lastname = {Yeh}, month = {August}, own = {notown}, pages = {982--989}, pdf = {yeh06-towards.pdf}, publisher = {Springer}, read = {notread}, series = {Lecture Notes in Computer Science}, title = {Towards Automatic Concept Hierarchy Generation for Specific Knowledge Network.}, url = {http://dx.doi.org/10.1007/11779568_105}, volume = 4031, year = 2006 } @techreport{page1998pagerank, author = {Page, Lawrence and Brin, Sergey and Motwani, Rajeev and Winograd, Terry}, file = {page1998pagerank.pdf:page1998pagerank.pdf:PDF}, institution = {Stanford Digital Library Technologies Project}, interhash = {ca10cf0b0dd668c64b1f378ff0775849}, intrahash = {408c27df50e9c4a8680426758f63656f}, lastdatemodified = {2006-08-24}, lastname = {Page}, own = {notown}, pdf = {page98-pagerank.pdf}, read = {notread}, title = {The PageRank Citation Ranking: Bringing Order to the Web}, url = {citeseer.ist.psu.edu/page98pagerank.html}, year = 1998 } @inproceedings{mani2004automatcally, abstract = {The emergence of vast quantities of on-line information has raised the importance of methods for automatic cataloguing of information in a variety of domains, including electronic commerce and bioinformatics. Ontologies can play a critical role in such cataloguing. In this paper, we describe a system that automatically induces an ontology from any large on-line text collection in a specific domain. The ontology that is induced consists of domain concepts, related by kind-of and part-of links. To achieve domain-independence, we use a combination of relatively shallow methods along with any available repositories of applicable background knowledge. We describe our evaluation experiences using these methods, and provide examples of induced structures.}, address = {Geneva}, author = {Mani, Inderjeet and Samuel, Ken and Concepcion, Kris and Vogel, David}, booktitle = {Proceedings of the 3rd International Workshop on Computational Terminology}, dateadded = {2006-07-18}, interhash = {9ec83ddb1f251792d05345daa8357bf8}, intrahash = {dce5c6301943fe6c9648cf671ceb167e}, lastdatemodified = {2006-07-18}, lastname = {Mani}, month = {August}, own = {notown}, pdf = {mani04-automatically.pdf}, read = {notread}, title = {Automatcally Inducing Ontologies from Corpora}, url = {http://-new.biomath.jussieu.fr/~pz/computerm2004.html}, year = 2004 } @inproceedings{jones2005dont, address = {New York, NY, USA}, author = {Jones, William and Phuwanartnurak, Ammy J. and Gill, Rajdeep and Bruce, Harry}, booktitle = {CHI '05: CHI '05 extended abstracts on Human factors in computing systems}, file = {jones2005dont.pdf:jones2005dont.pdf:PDF}, interhash = {c780239db9bec9fe3890e8ef76047811}, intrahash = {a5e951e5eec1ac6794d65bee461e11ba}, lastdatemodified = {2006-09-25}, lastname = {Jones}, own = {buy}, pages = {1505--1508}, pdf = {jones05-dont.pdf}, publisher = {ACM Press}, read = {any}, title = {Don't take my folders away!: organizing personal information to get things done}, url = {dx.doi.org/10.1145/1056808.1056952}, year = 2005 } @misc{erdmann2000from, abstract = {Semantic Annotation is a basic technology for intelligent content and is beneficial in a wide range of contentoriented intelligent applications. In this paper we present our work in ontology-based semantic annotation, which is embedded in a scenario of a knowledge portal application. Starting with seemingly good and bad manual semantic annotation, we describe our experiences made within the KA # -initiative. The experiences gave us the starting point for developing an ergonomic and knowledge base-supported annotation tool. Furthermore, the annotation tool described are currently extended with mechanisms for semi-automatic information-extraction based annotation. Supporting the evolving nature of semantic content we additionally describe our idea of evolving ontologies supporting semantic annotation. 1 Introduction The KA # -initiative (Knowledge Annotation initiative of the Knowledge Acquisition community) was launched at EKAW in 1997 in order to provide semantic access to inform...}, author = {Erdmann, Michael and Maedche, Alexander and Schnurr, Hans-Peter and Staab, Steffen}, file = {erdmann2000from.pdf:erdmann2000from.pdf:PDF}, interhash = {57d3bfc9b4d1fb72754c050790f8c3fd}, intrahash = {19bb2756cd3da755a3a5238b01cd4d2b}, lastdatemodified = {2006-07-13}, lastname = {Erdmann}, longnotes = {[[http://citeseer.ist.psu.edu/283251.html citeseer]]}, month = {jun 23}, own = {own}, pdf = {erdmann00-from.pdf}, read = {readnext}, title = {From Manual to Semi-automatic Semantic Annotation: About Ontology-based Text Annotation Tools}, year = 2000 } @inproceedings{cimiano2003automaticb, author = {Cimiano, Philipp and Staab, Steffen and Tane, Julien}, booktitle = {Proceedings of the ECML/PKDD Workshop on Adaptive Text Extraction and Mining, Cavtat-Dubrovnik, Croatia}, interhash = {2f9df79fa0d890faa91dc1d0d0def735}, intrahash = {c62b4e1dc65490d68bef7eaed01f83ea}, lastdatemodified = {2007-03-22}, lastname = {Cimiano}, own = {notown}, pages = {10-17}, pdf = {cimiano03-automatic.pdf}, read = {notread}, title = {Automatic Acquisition of Taxonomies from Text: FCA meets NLP}, url = {\url{http://www.aifb.uni-karlsruhe.de/WBS/pci/ontolearning.pdf}}, year = 2003 } @article{borgatti1990ls, abstract = {Seidman (1983a) has suggested that the engineering concept of LS sets provides a good formaliza- tion of the intuitive network notion of a cohesive subset. Some desirable features that LS sets exhibit are that they are difficult to disconnect by removing edges, they are relatively dense within and isolated without, they have limited diameter, and individual members have more direct links to other members than to non-members. Unfortunately, this plethora of features means that LS sets occur only rarely in real data. It also means that they do not make good independent variables for structural analyses in which greater-than-expected in-group homogeneity is hypothe- sized with respect to some substantive dependent variable, because it is unclear which aspect of the LS set was responsible for the observed homogeneity. We discuss a variety of generalizations and relations of LS sets based on just a few of the properties possessed by LS sets. Some of these simpler models are drawn from the literature while others are introduced in this paper. One of the generalizations we introduce, called a lambda set, is based on the property that members of the set have greater edge connectivity with other members than with non-members. This property is shared by LS sets. Edge connectivity satisfies the axioms of an ultrametric similarity measure, and so LS sets and lambda sets are shown to correspond to a particular hierarchical clustering of the nodes in a network. Lambda sets are straightforward to compute, and we have made use of this fact to introduce a new algorithm for computing LS sets which runs an order of magnitude faster than the previous alternative.}, author = {Borgatti, S.P. and Everett, M.G. and Shirey, P.R.}, file = {borgatti1990ls.pdf:borgatti1990ls.pdf:PDF}, interhash = {f33aeeed4069edb28d4eec6e62b0b4a9}, intrahash = {831a1b217a76e0d87b040344da1b6a12}, journal = {Social Networks}, lastdatemodified = {2006-12-04}, lastname = {Borgatti}, number = 12, own = {notown}, pages = {337--358}, pdf = {borgatti90-ls.pdf}, read = {notread}, title = {LS sets, lambda sets, and other cohesive subsets}, year = 1990 } @inproceedings{2006text, abstract = {This paper presents a series of text-mining algorithms for managing knowledge directory, which is one of the most crucial problems in constructing knowledge management systems today. In future systems, the constructed directory, in which knowledge objects are automatically classified, should evolve so as to provide a good indexing service, as the knowledge collection grows or its usage changes. One challenging issue is how to combine manual and automatic organization facilities that enable a user to flexibly organize obtained knowledge by the hierarchical structure over time. To this end, I propose three algorithms that utilize text mining technologies: semi-supervised classification, semi-supervised clustering, and automatic directory building. Through experiments using controlled document collections, the proposed approach is shown to significantly support hierarchical organization of large electronic knowledge base with minimal human effort}, address = {Berlin, Germany}, author = {joon, Kim Han}, booktitle = {Proceedings of the First International Conference on Knowledge Science, Engineering and Management (KSEM'06)}, dateadded = {2006-09-30}, editor = {Lang, J and Lin, F and Wang, J.}, interhash = {cd7783b34b37d402830ac0f4477a3c44}, intrahash = {babcd6f6b809e6727a8e7a0d188b4325}, lastdatemodified = {2006-09-30}, lastname = {Han-joon}, month = {August}, own = {notown}, pages = {202-214}, pdf = {kim06-text.pdf}, publisher = {Springer}, read = {notread}, series = {Lecture Notes in Artificial Intelligence}, title = {On Text Mining Algorithms for Automated Maintenance of Hierarchical Knowledge Directory}, url = {http://dx.doi.org/10.1007/11811220_18}, volume = 4092, year = 2006 } @inproceedings{zhang2004web, abstract = {We address the problem of integrating objects from a source taxonomy into a master taxonomy. This problem is not only currently pervasive on the web, but also important to the emerging semantic web. A straightforward approach to automating this process would be to train a classifier for each category in the master taxonomy, and then classify objects from the source taxonomy into these categories. In this paper we attempt to use a powerful classification method, Support Vector Machine (SVM), to attack this problem. Our key insight is that the availability of the source taxonomy data could be helpful to build better classifiers in this scenario, therefore it would be beneficial to do transductive learning rather than inductive learning, i.e., learning to optimize classification performance on a particular set of test examples. Noticing that the categorizations of the master and source taxonomies often have some semantic overlap, we propose a method, Cluster Shrinkage (CS), to further enhance the classification by exploiting such implicit knowledge. Our experiments with real-world web data show substantial improvements in the performance of taxonomy integration.}, address = {New York, NY, USA}, author = {Zhang, Dell and Lee, Wee Sun}, booktitle = {WWW '04: Proceedings of the 13th international conference on World Wide Web}, file = {zhang2004web.pdf:zhang2004web.pdf:PDF}, interhash = {7edb6ba8814bb3382ffaeb009d5a3183}, intrahash = {b683a9061fe344cfbcd62aa85e44d2c4}, lastdatemodified = {2005-08-07}, lastname = {Zhang}, own = {own}, pages = {472--481}, pdf = {zhang04.pdf}, publisher = {ACM Press}, read = {notread}, title = {Web taxonomy integration using support vector machines}, url = {zhang04.ps}, year = 2004 } @misc{newman2003structure, abstract = {Inspired by empirical studies of networked systems such as the Internet, social networks, and biological networks, researchers have in recent years developed a variety of techniques and models to help us understand or predict the behavior of these systems. Here we review developments in this field, including such concepts as the small-world effect, degree distributions, clustering, network correlations, random graph models, models of network growth and preferential attachment, and dynamical processes taking place on networks.}, author = {Newman, M. E. J.}, file = {newman2003structure.pdf:newman2003structure.pdf:PDF}, interhash = {7bedd01cb4c06af9f5200b0fb3faa571}, intrahash = {d53568209eef08fb0a8734cf34c59a71}, lastdatemodified = {2006-10-07}, lastname = {Newman}, month = {March}, own = {notown}, pdf = {newman03-structure.pdf}, read = {notread}, title = {The structure and function of complex networks}, url = {http://arxiv.org/abs/cond-mat/0303516}, year = 2003 } @inproceedings{joachims1999transductive, abstract = {This paper introduces Transductive Support Vector Machines (TSVMs) for text classifi­ cation. While regular Support Vector Ma­ chines (SVMs) try to induce a general deci­ sion function for a learning task, Transduc­ tive Support Vector Machines take into ac­ count a particular test set and try to mini­ mize misclassifications of just those particu­ lar examples. The paper presents an anal­ ysis of why TSVMs are well suited for text classification. These theoretical findings are supported by experiments on three test col­ lections. The experiments show substantial improvements over inductive methods, espe­ cially for small training sets, cutting the num­ ber of labeled training examples down to a twentieth on some tasks. This work also pro­ poses an algorithm for training TSVMs effi­ ciently, handling 10,000 examples and more.}, address = {Bled, SL}, author = {Joachims, Thorsten}, booktitle = {Proceedings of {ICML}-99, 16th International Conference on Machine Learning}, editor = {Bratko, Ivan and Dzeroski, Saso}, file = {joachims1999transductive.pdf:joachims1999transductive.pdf:PDF}, interhash = {7b7737625402ab3e8db5f53d1de716d6}, intrahash = {7cf3e7981cac898c1745418db83e0fd6}, lastdatemodified = {2005-08-06}, lastname = {Joachims}, own = {own}, pages = {200--209}, pdf = {joachims99.pdf}, publisher = {Morgan Kaufmann Publishers, San Francisco, US}, read = {notread}, title = {Transductive Inference for Text Classification using Support Vector Machines}, url = {joachims99.ps}, year = 1999 } @inproceedings{joachims1998text, abstract = {This paper explores the user of Support Vector machines (SVMs) for learning text classifiers from examples. It analyzes the particular properties of learning with text data and identifies, why SVMs are appropriate for this task. Empirical results support the theoretical findings. SVMs achieve substantial improvements over the currently best performing methods and they behave robustly over a variety of different learning tasks. Furthermore, they are fully automatic, eliminating the need for manuar parameter tuning.}, address = {Chemnitz, DE}, author = {Joachims, Thorsten}, booktitle = {Proceedings of {ECML}-98, 10th European Conference on Machine Learning}, editor = {N{\'{e}}dellec, Claire and Rouveirol, C{\'{e}}line}, interhash = {997f731cfc4fdb02cb32eb88c4fab2e9}, intrahash = {d31522a42571f0adb834c5956540716e}, lastdatemodified = {2005-08-06}, lastname = {Joachims}, number = 1398, own = {own}, pages = {137--142}, pdf = {joachims98.pdf}, publisher = {Springer Verlag, Heidelberg, DE}, read = {notread}, title = {Text categorization with support vector machines: learning with many relevant features}, url = {joachims98.ps}, year = 1998 } @article{holsapple2002collaborative, author = {Holsapple, Clyde W. and Joshi, K. D.}, file = {holsapple2002collaborative.pdf:holsapple2002collaborative.pdf:PDF}, interhash = {7f11603c8777130c2bbd10da102a86f3}, intrahash = {09e47889685c9ba0567d72d2227cc265}, journal = {Commun. ACM}, lastdatemodified = {2006-12-04}, lastname = {Holsapple}, number = 2, own = {buy}, pages = {42--47}, pdf = {holsapple02-collaborative.pdf}, read = {notread}, title = {A collaborative approach to ontology design}, volume = 45, year = 2002 } @techreport{hofmann1998statistical, abstract = {Modeling and predicting co-occurrences of events is a fundamental problem of unsupervised learning. In this contribution we develop a statistical framework for analyzing co-occurrence data in a general setting where elementary observations are joint occurrences of pairs of abstract objects from two finite sets. The main challenge for statistical models in this context is to overcome the inherent data sparseness and to estimate the probabilities for pairs which were rarely observed or even unobserved in a given sample set. Moreover, it is often of considerable interest to extract grouping structure or to find a hierarchical data organization. A novel family of mixture models is proposed which explain the observed data by a finite number of shared aspects or clusters. This provides a common framework for statistical inference and structure discovery and also includes several recently proposed models as special cases. Adopting the maximum likelihood principle, EM algorithms are derived to fit the model parameters. We develop improved versions of EM which largely avoid overfitting problems and overcome the inherent locality of EM--based optimization. Among the broad variety of possible applications, e.g., in information retrieval, natural language processing, data mining, and computer vision, we have chosen document retrieval, the statistical analysis of noun/adjective co-occurrence and the unsupervised segmentation of textured images to test and evaluate the proposed algorithms.}, address = {Cambridge, MA, USA}, author = {Hofmann, Thomas and Puzicha, Jan}, file = {hofmann1998statistical.pdf:hofmann1998statistical.pdf:PDF}, institution = {Massachusetts Institute of Technology}, interhash = {f4d76aef2c16c571ee23fc04aac781c2}, intrahash = {edca714d2bc0060e6f3e4e7e67df690b}, lastdatemodified = {2007-03-13}, lastname = {Hofmann}, month = {February}, own = {notown}, pdf = {hofmann98_statistical.pdf}, read = {notread}, title = {Statistical Models for Co-occurrence Data}, year = 1998 } @mastersthesis{herold2005collaborative, abstract = {Ontologies specify the knowledge about a domain of interest using formal semantics, for example the products offered by an online-shop as well as a taxonomy of product categories organizing the products for better browsing and searching or an information portal like a digital library organizing books in a hierarchical category system. Personalized semantic applications allow users to have a personal copy of the ontology and to tailor it to their needs: e.g., they can select to see only a subset of all the categories available, merge existing categories, or introduce completely new categories. In large information portals one can try to support users in maintaining their personal ontology by recommending them changes based on ontologies of other users. For example, if a user has many articles about "Java" and "C++" in his personal bibliography but all of them in a common category "programming languages", but other users have a better organization using two subcategories for "Java" and "C++", respectively, one would like to recommend the user to add these two subcategories and assign the papers accordingly. Methods from Machine Learning, especially from Recommender Systems and Collaborative Filtering can be applied to learn such recommendations. The task of this topic is, to implement different strategies for recommending categories, their super- und subcategories and the assignment of products to the categories: very simple strategies that take into account other users' ontologies only in a summary way, e.g., always recommend the most often used concept first, as well as personalized recommenders based on collaborative filtering methods. The strategies should be evaluated on several synthetic datasets.}, address = {Freiburg, Germany}, author = {Herold, Magnus}, dateadded = {2006-09-02}, file = {herold2005collaborative.pdf:herold2005collaborative.pdf:PDF}, interhash = {a484d48faa89d3d52786269fb72c0ff3}, intrahash = {2cb505929e5006d4438cc595b988384b}, lastdatemodified = {2006-09-02}, lastname = {Herold}, month = {December}, own = {notown}, pdf = {herold05-collaborative.pdf}, read = {notread}, school = {Albert-Ludwigs-University}, title = {Collaborative Personal Ontology Evolution}, type = {Diploma Thesis}, year = 2005 } @manual{hanneman2005introduction, abstract = {This on-line textbook introduces many of the basics of formal approaches to the analysis of social networks. The text relies heavily on the work of Freeman, Borgatti, and Everett (the authors of the UCINET software package). The materials here, and their organization, were also very strongly influenced by the text of Wasserman and Faust, and by a graduate seminar conducted by Professor Phillip Bonacich at UCLA. Many other users have also made very helpful comments and suggestions based on the first version. Errors and omissions, of course, are the responsibility of the authors}, address = {Riverside, CA}, author = {Hanneman, Robert A. and Riddle, Mark}, file = {hanneman2005introduction.pdf:hanneman2005introduction.pdf:PDF}, interhash = {e7a9585889a0f228867e7e464647daae}, intrahash = {9c01af7f711a7e9b6513a77f32ea11e3}, lastdatemodified = {2006-12-04}, lastname = {Hanneman}, organization = {University of California, Riverside}, own = {notown}, pdf = {hanneman05-introduction.pdf}, read = {notread}, title = {Introduction to social network methods}, url = {http://www.faculty.ucr.edu/~hanneman/}, year = 2005 }