@inproceedings{tatti2006dimension, abstract = {Many 0/1 datasets have a very large number of variables; however, they are sparse and the dependency structure of the variables is simpler than the number of variables would suggest. Defining the effective dimensionality of such a dataset is a nontrivial problem. We consider the problem of defining a robust measure of dimension for 0/1 datasets, and show that the basic idea of fractal dimension can be adapted for binary data. However, as such the fractal dimension is difficult to interpret. Hence we introduce the concept of normalized fractal dimension. For a dataset D, its normalized fractal dimension counts the number of independent columns needed to achieve the unnormalized fractal dimension of D. The normalized fractal dimension measures the degree of dependency structure of the data. We study the properties of the normalized fractal dimension and discuss its computation. We give empirical results on the normalized fractal dimension, comparing it against PCA.}, author = {Tatti, N. and Mielikainen, T. and Gionis, A. and Mannila, H.}, booktitle = {Proceedings of the Sixth IEEE International Conference on Data Mining (ICDM 2006)}, doi = {10.1109/ICDM.2006.167}, interhash = {5164cd6a09b802d14dce6d3947df60cd}, intrahash = {0a8ad03bc7d2d0d7d77ee73eede4ecc0}, issn = {1550-4786}, month = dec, organization = {IEEE}, pages = {603--612}, title = {What is the Dimension of Your Binary Data?}, url = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=4053086}, year = 2006 } @inproceedings{Gunopulos97a, author = {Gunopulos, D. and Khardon, R. and Mannila, H. and Toivonen, H.}, booktitle = {Proceedings of the 16th ACM SIGACT-SIGMOD-SIGART symposium on Principles of Database Systems (PODS'97)}, interhash = {25524743720ea6513223dce2de1696c6}, intrahash = {7c848f3673236b109501141c448ba05e}, month = may, pages = {209--216}, publisher = {ACM Press}, title = {Data mining, hypergraph transversals, and machine learning}, year = 1997 } @inproceedings{Haetoenen96, author = {H�t�nen, K. and Klemettinen, M. and Mannila, H. and Ronkainen, P. and Toivonen, H.}, booktitle = {Proceedings of the 12th International Conference on Data Engineering (ICDE'96)}, interhash = {0b28b91f7bb58ebb9b2270dfd415102b}, intrahash = {e50ef2122738aa406fea40abff93beb7}, month = {February}, pages = {115--122}, publisher = {IEEE Computer Society Press}, title = {Knowledge discovery from telecommunication network alarm databases}, year = 1996 } @inproceedings{Klemettinen97, author = {Klemettinen, M. and Mannila, H. and Toivonen, H.}, booktitle = {Proceedings of the 8th international conference on Database and Expert systems Applications (DEXA'97)}, interhash = {67c69f1424a17566dc50f28bba37bb88}, intrahash = {f602f9d2fa767af48670d454a0076b0b}, month = {September}, pages = {670--677}, publisher = {Springer-Verlag}, series = {Lecture Notes in Computer Science, Vol. 1308}, title = {A data-mining methodology and its application to semi-automatic knowledge acquisition}, year = 1997 } @inproceedings{Gunopulos97b, author = {Gunopulos, D. and Mannila, H. and Saluja, S.}, booktitle = {Proceedings of the 6th biennial International Conference on Database Theory (ICDT'97)}, interhash = {3cb316bfdbc20dce458caa31527d5f16}, intrahash = {ea3e43fd09e33e643732767059ee9a34}, month = {January}, pages = {215-229}, publisher = {Springer-Verlag}, series = {Lecture Notes in Computer Science, Vol. 1186}, title = {Discovering all most specific sentences by randomized algorithms}, year = 1997 } @inproceedings{Klemettinen94, author = {Klemettinen, M. and Mannila, H. and Ronkainen, P. and Toivonen, H. and Verkamo, A. I.}, booktitle = {Proceedings of the 3rd international Conference on Information and Knowledge Management (CIKM'94)}, interhash = {2ffb34191761cc3a31fc33a3b62266b0}, intrahash = {c0e1a4b3cb31a35d300ef2c4ec7f6c66}, month = {November}, pages = {401--407}, publisher = {ACM Press}, title = {Finding interesting rules from large sets of discovered association rules}, year = 1994 } @inproceedings{Mannila95, author = {Mannila, H. and Toivonen, H. and Verkamo, A. I.}, booktitle = {Proceedings of the 1st international conference on Knowledge Discovery and Data mining (KDD'95)}, interhash = {d03bb2f89d936c6b5b79da5ecf07c177}, intrahash = {3acd22af2c0b5a334547377be28d0d7d}, month = {August}, pages = {210--215}, publisher = {AAAI Press}, title = {Discovering frequent episodes in sequences}, year = 1995 } @inproceedings{Mannila96b, author = {Mannila, H. and Toivonen, H.}, booktitle = {Proceedings of the 2nd international conference on Knowledge Discovery and Data mining (KDD'96)}, interhash = {a52062a0a53d04fbbac5255ceb1c8d4f}, intrahash = {baab204be045090ee21268747b60e4c0}, month = {August}, pages = {146--151}, publisher = {AAAI Press}, title = {Discovering generalized episodes using minimal occurrences}, year = 1996 } @inproceedings{Mannila94, author = {Mannila, H. and Toivonen, H. and Verkamo, A. I.}, booktitle = {AAAI'94 Workshop on Knowledge Discovery in Databases}, interhash = {c6ef61a187c7a926628a034f95aa10e5}, intrahash = {f8c7e9f86bcc70f9e5c940717c0b2eee}, month = {July}, pages = {181--192}, publisher = {AAAI Press}, title = {Efficient algorithms for discovering association rules}, year = 1994 } @inproceedings{Mannila96a, author = {Mannila, H. and Toivonen, H.}, booktitle = {Proceedings of the 2nd international conference on Knowledge Discovery and Data mining (KDD'96)}, interhash = {1a14c093605457d9b86fdc2987800fae}, intrahash = {b968c413c71176cd23ee6c69dcebe1ec}, month = {August}, pages = {189--194}, publisher = {AAAI Press}, title = {Multiple uses of frequent sets and condensed representations}, year = 1996 } @article{Mannila97b, author = {Mannila, H. and Toivonen, H.}, editor = {Fayyad, U. and Mannila, H. and Ramakrishnan, R.}, interhash = {d50c17b81b91904aed719482ff653692}, intrahash = {41fe893277feb80e653f199ad0d5cd56}, journal = {Data Mining and Knowledge Discovery}, month = {September}, number = 3, pages = {241--258}, publisher = {Kluwer Academic Publishers}, title = {Levelwise search and borders of theories in knowledge discovery}, volume = 1, year = 1997 } @inproceedings{Toivonen95, author = {Toivonen, H. and Klemettinen, M. and Ronkainen, P. and Hatonen, K. and Mannila, H.}, booktitle = {ECML'95 MLnet workshop on statistics, machine learning, and knowledge discovery in databases}, interhash = {7d1b841423e92b63588eb87e3b9b6324}, intrahash = {64c06a361e931796d47725eb54a06bdc}, month = {April}, pages = {47--52}, title = {Pruning and grouping discovered association rules}, year = 1995 } @inproceedings{Mannila97a, author = {Mannila, H.}, booktitle = {Proceedings of the 6th biennial International Conference on Database Theory (ICDT'97)}, interhash = {2992e5fd0f7b8767f483d23dba1528fe}, intrahash = {7f3260c2630949cd2a420dc3b9dbe509}, month = {January}, pages = {41-55}, publisher = {Springer-Verlag}, series = {Lecture Notes in Computer Science, Vol. 1186}, title = {Methods and problems in data mining}, year = 1997 } @incollection{Agrawal96, author = {Agrawal, R. and Mannila, H. and Srikant, R. and Toivonen, H. and Verkamo, A.I.}, booktitle = {Advances in Knowledge Discovery and data mining}, editor = {Fayyad, U.M. and Piatetsky-Shapiro, G. and Smyth, P. and Uthurusamy, R.}, interhash = {a5d9c810f500dbab7c1358aa43c8b65a}, intrahash = {7b5dd76b720883ae358d9e8b93833e7f}, pages = {307--328}, publisher = {AAAI Press}, title = {Fast discovery of association rules}, year = 1996 } @book{Handetal01, author = {Hand, D. and Mannila, H. and Smyth, P.}, interhash = {1d7f5d27a1dfea47e1ac543fcb027c0d}, intrahash = {e2e195a4102d09f2ed4976bf102af01d}, location = {Santa Barbara, CA}, publisher = {Cambridge, MA: MIT Press}, title = {Principles of Data Mining}, year = 2001 }