@article{jorge2009margin, abstract = {From a multi-class learning task, in addition to a classifier, it is possible to infer some useful knowledge about the relationship between the classes involved. In this paper we propose a method to learn a hierarchical clustering of the set of classes.The usefulness of such clusterings has been exploited in bio-medical applications to find out relations between diseases orpopulations of animals. The method proposed here defines a distance between classes based on the margin maximization principle,and then builds the hierarchy using a linkage procedure. Moreover, to quantify the goodness of the hierarchies we define ameasure. Finally, we present a set of experiments comparing the scores achieved by our approach with other methods.}, author = {Díez, Jorge and del Coz, Juan and Bahamonde, Antonio and Luaces, Oscar}, interhash = {634c6107fd84fc2f0b17bf1559436a89}, intrahash = {fa3f2f8d6a9103c72fd8a32ca0d1e247}, journal = {Machine Learning and Knowledge Discovery in Databases}, pages = {302--314}, title = {Soft Margin Trees}, url = {http://dx.doi.org/10.1007/978-3-642-04180-8_37}, year = 2009 } @article{kelvin2009multiattribute, abstract = {In many real-world applications that analyze correlations between two groups of diverse entities, each group of entities can be characterized by multiple attributes. As such, there is a need to co-cluster multiple attributes’ values into pairs of highly correlated clusters. We denote this co-clustering problem as the multi-attribute co-clustering problem. In this paper, we introduce a generalization of the mutual information between two attributes into mutual informationbetween two attribute sets. The generalized formula enables us to use correlation information to discover multi-attribute co-clusters (MACs). We develop a novel algorithm MACminer to mine MACs with high correlation information from datasets. We demonstrate the miningefficiency of MACminer in datasets with multiple attributes, and show that MACs with high correlation information have higherclassification and predictive power, as compared to MACs generated by alternative high-dimensional data clustering and patternmining techniques.}, author = {Sim, Kelvin and Gopalkrishnan, Vivekanand and Chua, Hon and Ng, See-Kiong}, interhash = {5d3a6eff6a13dc171f20316f3a9670ce}, intrahash = {bbe4260fc922a77c79555f8e9b8120bc}, journal = {Machine Learning and Knowledge Discovery in Databases}, pages = {398--413}, title = {MACs: Multi-Attribute Co-clusters with High Correlation Information}, url = {http://dx.doi.org/10.1007/978-3-642-04174-7_26}, year = 2009 } @article{nguyen2009efficient, abstract = {Outlier detection finds many applications, especially in domains that have scope for abnormal behavior. In this paper, we present a new technique for detecting distance-based outliers, aimed at reducing execution time associated with the detectionprocess. Our approach operates in two phases and employs three pruning rules. In the first phase, we partition the data intoclusters, and make an early estimate on the lower bound of outlier scores. Based on this lower bound, the second phase thenprocesses relevant clusters using the traditional block nested-loop algorithm. Here two efficient pruning rules are utilizedto quickly discard more non-outliers and reduce the search space. Detailed analysis of our approach shows that the additionaloverhead of the first phase is offset by the reduction in cost of the second phase. We also demonstrate the superiority ofour approach over existing distance-based outlier detection methods by extensive empirical studies on real datasets.}, author = {Vu, Nguyen and Gopalkrishnan, Vivekanand}, interhash = {e219b7e66b466cc39f44520b37f91a61}, intrahash = {b33d7b9133cc3d81e507f4366658fb56}, journal = {Machine Learning and Knowledge Discovery in Databases}, pages = {160--175}, title = {Efficient Pruning Schemes for Distance-Based Outlier Detection}, url = {http://dx.doi.org/10.1007/978-3-642-04174-7_11}, year = 2009 } @article{pu2009latent, abstract = {Co-clustering has emerged as an important technique for mining contingency data matrices. However, almost all existing co-clustering algorithms are hard partitioning, assigning each row and column of the data matrix to one cluster. Recently a Bayesian co-clusteringapproach has been proposed which allows a probability distribution membership in row and column clusters. The approach usesvariational inference for parameter estimation. In this work, we modify the Bayesian co-clustering model, and use collapsedGibbs sampling and collapsed variational inference for parameter estimation. Our empirical evaluation on real data sets showsthat both collapsed Gibbs sampling and collapsed variational inference are able to find more accurate likelihood estimatesthan the standard variational Bayesian co-clustering approach.}, author = {Wang, Pu and Domeniconi, Carlotta and Laskey, Kathryn}, interhash = {ca3c6ea6255fd4fa4601502fd55bec24}, intrahash = {0ef1833cdcdf2a7d9093e37894c4f3ab}, journal = {Machine Learning and Knowledge Discovery in Databases}, pages = {522--537}, title = {Latent Dirichlet Bayesian Co-Clustering}, url = {http://dx.doi.org/10.1007/978-3-642-04174-7_34}, year = 2009 }