@article{thurau2012descriptive, abstract = {Climate change, the global energy footprint, and strategies for sustainable development have become topics of considerable political and public interest. The public debate is informed by an exponentially growing amount of data and there are diverse partisan interest when it comes to interpretation. We therefore believe that data analysis methods are called for that provide results which are intuitively understandable even to non-experts. Moreover, such methods should be efficient so that non-experts users can perform their own analysis at low expense in order to understand the effects of different parameters and influential factors. In this paper, we discuss a new technique for factorizing data matrices that meets both these requirements. The basic idea is to represent a set of data by means of convex combinations of extreme data points. This often accommodates human cognition. In contrast to established factorization methods, the approach presented in this paper can also determine over-complete bases. At the same time, convex combinations allow for highly efficient matrix factorization. Based on techniques adopted from the field of distance geometry, we derive a linear time algorithm to determine suitable basis vectors for factorization. By means of the example of several environmental and developmental data sets we discuss the performance and characteristics of the proposed approach and validate that significant efficiency gains are obtainable without performance decreases compared to existing convexity constrained approaches.}, affiliation = {Fraunhofer Institute for Intelligent Analysis and Information Systems IAIS, Sankt Augustin, Germany}, author = {Thurau, Christian and Kersting, Kristian and Wahabzada, Mirwaes and Bauckhage, Christian}, doi = {10.1007/s10618-011-0216-z}, interhash = {457c57f054fea45dcbc8447263591d97}, intrahash = {387f4e1711d7065bd5a94455aeae1957}, issn = {1384-5810}, journal = {Data Mining and Knowledge Discovery}, keyword = {Computer Science}, number = 2, pages = {325-354}, publisher = {Springer Netherlands}, title = {Descriptive matrix factorization for sustainability Adopting the principle of opposites}, url = {http://dx.doi.org/10.1007/s10618-011-0216-z}, volume = 24, year = 2012 } @inproceedings{kersting2010convex, abstract = {We present an extension of convex-hull nonnegative matrix factorization (CH-NMF) which was recently proposed as a large scale variant of convex non-negative matrix factorization (CNMF) or Archetypal Analysis (AA). CH-NMF factorizes a non-negative data matrix V into two non-negative matrix factors V = WH such that the columns of W are convex combinations of certain data points so that they are readily interpretable to data analysts. There is, however, no free lunch: imposing convexity constraints on W typically prevents adaptation to intrinsic, low dimensional structures in the data. Alas, in cases where the data is distributed in a nonconvex manner or consists of mixtures of lower dimensional convex distributions, the cluster representatives obtained from CH-NMF will be less meaningful. In this paper, we present a hierarchical CH-NMF that automatically adapts to internal structures of a data set, hence it yields meaningful and interpretable clusters for non-convex data sets. This is also conformed by our extensive evaluation on DBLP publication records of 760,000 authors, 4,000,000 images harvested from the web, and 150,000,000 votes on World of Warcraft guilds.}, address = {Kassel, Germany}, author = {Kersting, Kristian and Wahabzada, Mirwaes and Thurau, Christian and Bauckhage., Christian}, booktitle = {Proceedings of LWA2010 - Workshop-Woche: Lernen, Wissen {\&} Adaptivitaet}, crossref = {lwa2010}, editor = {Atzmüller, Martin and Benz, Dominik and Hotho, Andreas and Stumme, Gerd}, end = {2010-10-06 09:45:00}, interhash = {9513ef5606b53314806fa4ad6507e819}, intrahash = {5a46921f58af4fc313d8b60f38859a57}, privnote = {Ansatz, um "Base-Vectors" herauszufinden, die eine grosse Datenmenge an Vektoren beschreiben. Coole Arbeit! Koennte man das zum Tag-Clustern verwenden?}, room = {0446}, session = {kdml3}, start = {2010-10-06 09:22:30}, title = {Convex NMF on Non-Convex Massiv Data}, track = {kdml}, url = {http://www.kde.cs.uni-kassel.de/conf/lwa10/papers/kdml5.pdf}, year = 2010 }