@inproceedings{lu2009exploit, abstract = {In this poster, we investigate how to enhance web clustering by leveraging the tripartite network of social tagging systems. We propose a clustering method, called "Tripartite Clustering", which cluster the three types of nodes (resources, users and tags) simultaneously based on the links in the social tagging network. The proposed method is experimented on a real-world social tagging dataset sampled from del.icio.us. We also compare the proposed clustering approach with K-means. All the clustering results are evaluated against a human-maintained web directory. The experimental results show that Tripartite Clustering significantly outperforms the content-based K-means approach and achieves performance close to that of social annotation-based K-means whereas generating much more useful information.}, address = {New York, NY, USA}, author = {Lu, Caimei and Chen, Xin and Park, E. K.}, booktitle = {CIKM '09: Proceeding of the 18th ACM conference on Information and knowledge management}, doi = {10.1145/1645953.1646167}, interhash = {e192e53972f28d78f1ecbffbfea08bed}, intrahash = {a120cece36e15b12321c87e7d0938d73}, isbn = {978-1-60558-512-3}, location = {Hong Kong, China}, pages = {1545--1548}, publisher = {ACM}, title = {Exploit the tripartite network of social tagging for web clustering}, url = {http://portal.acm.org/citation.cfm?id=1646167&dl=GUIDE&coll=GUIDE&CFID=93888742&CFTOKEN=72927742}, year = 2009 } @article{cerf2009closed, abstract = {Set pattern discovery from binary relations has been extensively studied during the last decade. In particular, many complete and efficient algorithms for frequent closed set mining are now available. Generalizing such a task to n-ary relations (n ≥ 2) appears as a timely challenge. It may be important for many applications, for example, when adding the time dimension to the popular objects × features binary case. The generality of the task (no assumption being made on the relation arity or on the size of its attribute domains) makes it computationally challenging. We introduce an algorithm called Data-Peeler. From an n-ary relation, it extracts all closed n-sets satisfying given piecewise (anti) monotonic constraints. This new class of constraints generalizes both monotonic and antimonotonic constraints. Considering the special case of ternary relations, Data-Peeler outperforms the state-of-the-art algorithms CubeMiner and Trias by orders of magnitude. These good performances must be granted to a new clever enumeration strategy allowing to efficiently enforce the closeness property. The relevance of the extracted closed n-sets is assessed on real-life 3-and 4-ary relations. Beyond natural 3-or 4-ary relations, expanding a relation with an additional attribute can help in enforcing rather abstract constraints such as the robustness with respect to binarization. Furthermore, a collection of closed n-sets is shown to be an excellent starting point to compute a tiling of the dataset.}, address = {New York, NY, USA}, author = {Cerf, Loïc and Besson, Jérémy and Robardet, Céline and Boulicaut, Jean-François}, doi = {10.1145/1497577.1497580}, interhash = {b67a66b57f8a6c61a9099bc1c3b407a9}, intrahash = {4685da7a801c274eb4e51693ca5a2adc}, issn = {1556-4681}, journal = {ACM Transactions on Knowledge Discovery from Data}, number = 1, pages = {1--36}, publisher = {ACM}, title = {Closed patterns meet n-ary relations}, volume = 3, year = 2009 } @inproceedings{cerf2008datapeeler, abstract = {Set pattern discovery from binary relations has been extensively studied during the last decade. In particular, many complete and efficient algorithms which extract frequent closed sets are now available. Generalizing such a task to n-ary relations (n ≥ 2) appears as a timely challenge. It may be important for many applications, e.g., when adding the time dimension to the popular objects × features binary case. The generality of the task — no assumption being made on the relation arity or on the size of its attribute domains — makes it computationally challenging. We introduce an algorithm called Data-Peeler. From a n-ary relation, it extracts all closed n-sets satisfying given piecewise (anti)-monotonic constraints. This new class of constraints generalizes both monotonic and anti-monotonic constraints. Considering the special case of ternary relations, Data-Peeler outperforms the state-of-the-art algorithms CubeMiner and Trias by orders of magnitude. These good performances must be granted to a new clever enumeration strategy allowing an efficient closeness checking. An original application on a real-life 4-ary relation is used to assess the relevancy of closed n-sets constraint-based mining.}, author = {Cerf, Loïc and Besson, Jérémy and Robardet, Céline and Boulicaut, Jean-Francois}, booktitle = {Proc. SIAM International Conference on Data Mining SDM'08}, interhash = {ec08ad149182185a9f2348f9b1e351c0}, intrahash = {53172260ef12c237ecf4d032a97e0434}, month = apr, pages = {37--48}, title = {Data-Peeler: Constraint-based Closed Pattern Mining in n-ary Relations}, url = {http://www.siam.org/proceedings/datamining/2008/dm08_04_Cerf.pdf}, year = 2008 } @inproceedings{bekkerman2005multiway, abstract = {We present a novel unsupervised learning scheme that simultaneously clusters variables of several types (e.g., documents, words and authors) based on pairwise interactions between the types, as observed in co-occurrence data. In this scheme, multiple clustering systems are generated aiming at maximizing an objective function that measures multiple pairwise mutual information between cluster variables. To implement this idea, we propose an algorithm that interleaves top-down clustering of some variables and bottom-up clustering of the other variables, with a local optimization correction routine. Focusing on document clustering we present an extensive empirical study of two-way, three-way and four-way applications of our scheme using six real-world datasets including the 20 News-groups (20NG) and the Enron email collection. Our multi-way distributional clustering (MDC) algorithms consistently and significantly outperform previous state-of-the-art information theoretic clustering algorithms.}, address = {New York, NY, USA}, author = {Bekkerman, Ron and El-Yaniv, Ran and McCallum, Andrew}, booktitle = {ICML '05: Proceedings of the 22nd International Conference on Machine learning}, doi = {10.1145/1102351.1102357}, interhash = {25609f84a6916c1664e61d8618f46a32}, intrahash = {2921f89f8663e7bcc122a2a77c66e7c2}, isbn = {1-59593-180-5}, location = {Bonn, Germany}, pages = {41--48}, publisher = {ACM}, title = {Multi-way distributional clustering via pairwise interactions}, url = {http://portal.acm.org/citation.cfm?id=1102351.1102357}, year = 2005 } @inproceedings{popescul01probabilistic, abstract = {Recommender systems leverage product and community information to target products to consumers. Researchers have developed collaborative recommenders, content-based recommenders, and a few hybrid systems. We propose a unified probabilistic framework for merging collaborative and content-based recommendations. We extend Hofmann's aspect model to incorporate three-way co-occurrence data among users, items, and item content. The relative influence of collaboration data versus content data is not...}, address = {Seattle, Washington}, author = {Popescul, Alexandrin and Ungar, Lyle and Pennock, David and Lawrence, Steve}, booktitle = {17th Conference on Uncertainty in Artificial Intelligence}, interhash = {429bcf0381d2b7b9ab95eea7d3a65776}, intrahash = {ae7ce7b8d1a31e81f9aa8b8367039506}, month = {August 2--5}, pages = {437--444}, title = {Probabilistic Models for Unified Collaborative and Content-Based Recommendation in Sparse-Data Environments}, url = {http://citeseer.ist.psu.edu/popescul01probabilistic.html}, year = 2001 } @incollection{hl84parafac, address = {New York}, author = {Harshman, R. A. and Lundy, M. E.}, booktitle = {{R}esearch methods for multimode data analysis}, editor = {Law, H. G. and {Snyder Jr}, C. W. and Hattie, J. A. and McDonald, R. P.}, interhash = {2e24bc87fee86cbc73811b7853680312}, intrahash = {0a93b832524031bbfe46db5406b99574}, pages = {122--215}, publisher = {Praeger}, title = {{T}he {P}{A}{R}{A}{F}{A}{C} model for three-way factor analysis and multidimensional scaling}, year = 1984 }