@inproceedings{Approximating2008Java, abstract = {In many social media applications, a small fraction of the members are highly linked while most are sparsely connected to the network. Such a skewed distribution is sometimes referred to as the"long tail". Popular applications like meme trackers and content aggregators mine for information from only the popular blogs located at the head of this curve. On the other hand, the long tail contains large volumes of interesting information and niches. The question we address in this work is how best to approximate the community membership of entities in the long tail using only a small percentage of the entire graph structure. Our technique utilizes basic linear algebra manipulations and spectral methods. It has the advantage of quickly and efficiently finding a reasonable approximation of the community structure of the overall network. Such a method has significant applications in blog analysis engines as well as social media monitoring tools in general. }, author = {Java, Akshay and Joshi, Anupam and FininBook, Tim}, booktitle = {Proceedings of the Second International Conference on Weblogs and Social Media(ICWSM 2008)}, date = {2008 Abstract:}, interhash = {ede357e110fee8803dc181d262f30087}, intrahash = {386f36679c111f30e37ced272d5b355c}, publisher = {AAAI Press}, title = {Approximating the Community Structure of the Long Tail}, url = {http://ebiquity.umbc.edu/paper/html/id/381/Approximating-the-Community-Structure-of-the-Long-Tail}, year = 2008 } @inproceedings{conf/cvpr/KeK04, author = {Ke, Qifa and Kanade, Takeo}, booktitle = {CVPR (2)}, date = {2004-10-25}, ee = {http://csdl.computer.org/comp/proceedings/cvpr/2004/2158/02/215820592abs.htm}, interhash = {44d129cdb2b7b706285653bd5dd1ed7f}, intrahash = {aab7d7e1f1285bb42e370f3921122018}, pages = {592-599}, title = {Robust Subspace Clustering by Combined Use of kNND Metric and SVD Algorithm.}, url = {http://dblp.uni-trier.de/db/conf/cvpr/cvpr2004-2.html#KeK04}, year = 2004 } @article{kleinberg99authoritative, abstract = {. The network structure of a hyperlinked environment can be a rich source of information about the content of the environment, provided we have effective means for understanding it. We develop a set of algorithmic tools for extracting information from the link structures of such environments, and report on experiments that demonstrate their effectiveness in a variety of contexts on the World Wide Web. The central issue we address within our framework is the distillation of broad search topics,...}, author = {Kleinberg, Jon M.}, citeulike-article-id = {1115}, citeulike-linkout-0 = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.32.9983}, citeulike-linkout-1 = {http://citeseer.nj.nec.com/kleinberg99authoritative.html}, citeulike-linkout-2 = {http://dx.doi.org/10.1145/324133.324140}, doi = {10.1145/324133.324140}, interhash = {48a48add3cba613f07df1e9b56278b85}, intrahash = {c86549355475331f563d0a3ba7816dab}, journal = {Journal of the ACM}, month = {September}, number = 5, pages = {604--632}, posted-at = {2005-07-07 17:29:16}, priority = {0}, title = {Authoritative sources in a hyperlinked environment}, url = {http://dx.doi.org/10.1145/324133.324140}, volume = 46, year = 1999 } @inproceedings{Gibson98clusteringcategorical, abstract = {We describe a novel approach for clustering collections of sets, and its application to the analysis and mining of categorical data. By "categorical data," we mean tables with fields that cannot be naturally ordered by a metric --- e.g., the names of producers of automobiles, or the names of products offered by a manufacturer. Our approach is based on an iterative method for assigning and propagating weights on the categorical values in a table; this facilitates a type of similarity measure arising from the cooccurrence of values in the dataset. Our techniques can be studied analytically in terms of certain types of non-linear dynamical systems. We discuss experiments on a variety of tables of synthetic and real data; we find that our iterative methods converge quickly to prominently correlated values of various categorical fields. 1 Introduction Much of the data in databases is categorical: fields in tables whose attributes cannot naturally be ordered as numerical values can. The pro...}, author = {Gibson, David and Kleinberg, Jon and Raghavan, Prabhakar}, interhash = {1439dc731dbc3225e455c4cd4ec297b1}, intrahash = {31bcdc070e056e9ba33ba155ebc9285d}, pages = {311--322}, title = {Clustering Categorical Data: An Approach Based on Dynamical Systems}, url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.43.8003}, year = 1998 } @article{drineas2004clustering, author = {Drineas, P. and Frieze, A. and Kannan, R. and Vempala, S. and Vinay, V.}, interhash = {74a3b1a8ba8bd61ccee643849d4605e8}, intrahash = {3487832093c8bd1a4b5296e3db7d26dd}, journal = {Machine Learning}, number = 1, pages = {9--33}, publisher = {Springer}, title = {{Clustering large graphs via the singular value decomposition}}, url = {http://scholar.google.de/scholar.bib?q=info:gQY9HvWhsJcJ:scholar.google.com/&output=citation&hl=de&ct=citation&cd=0}, volume = 56, year = 2004 }