@inproceedings{Ho98, address = {Granada}, author = {Hovy, E.H.}, booktitle = {Proc. 1st Intl. Conf. on Language Resources and Evaluation (LREC)}, interhash = {c8f274dc0380d76b5bb179152c47a959}, intrahash = {839e408657e0d8d6ca0b66fdae29063f}, isbn = {3-540-41066-X}, title = {Combining and Standardizing Large-Scale, Practical Ontologies for Machine Translation and Other Uses}, url = {http://www.isi.edu/natural-language/people/hovy/publications.html}, year = 1998 } @misc{citeulike:95936, abstract = {The discovery and analysis of community structure in networks is a topic of considerable recent interest within the physics community, but most methods proposed so far are unsuitable for very large networks because of their computational cost. Here we present a hierarchical agglomeration algorithm for detecting community structure which is faster than many competing algorithms: its running time on a network with n vertices and m edges is O(m d log n) where d is the depth of the dendrogram describing the community structure. Many real-world networks are sparse and hierarchical, with m ~ n and d ~ log n, in which case our algorithm runs in essentially linear time, O(n log^2 n). As an example of the application of this algorithm we use it to analyze a network of items for sale on the web-site of a large online retailer, items in the network being linked if they are frequently purchased by the same buyer. The network has more than 400,000 vertices and 2 million edges. We show that our algorithm can extract meaningful communities from this network, revealing large-scale patterns present in the purchasing habits of customers.}, author = {Clauset, Aaron and Newman, M. E. J. and Moore, Cristopher}, citeulike-article-id = {95936}, eprint = {cond-mat/0408187}, interhash = {2c68e3c981a00380692a3b0b661d7cfd}, intrahash = {f9a12630a6d31d576ea5222219a4cf0b}, month = {August}, priority = {0}, title = {Finding community structure in very large networks}, url = {http://arxiv.org/abs/cond-mat/0408187}, year = 2004 } @article{clauset-2004-70, author = {Clauset, Aaron and Newman, M.E.J. and Moore, Cristopher}, interhash = {2c68e3c981a00380692a3b0b661d7cfd}, intrahash = {a35d69f1d41a6cdd0632c5e1cadb4d44}, journal = {Physical Review E}, pages = 066111, title = {Finding community structure in very large networks}, url = {http://www.citebase.org/cgi-bin/citations?id=oai:arXiv.org:cond-mat/0408187}, volume = 70, year = 2004 } @inproceedings{kubicaKgroups, author = {Kubica, Jeremy and Moore, Andrew and Schneider, Jeff}, booktitle = {The Third IEEE International Conference on Data Mining}, editor = {Wu, Xindong and Tuzhilin, Alex and Shavlik, Jude}, interhash = {0d7be00e85fa41a082bab454c0665126}, intrahash = {a2602433bd2f144216fdddd3704d487f}, month = {November}, pages = {573-576}, publisher = {IEEE Computer Society}, title = {Tractable Group Detection on Large Link Data Sets}, year = 2003 } @techreport{Kubica_2003_4489, abstract = {Discovering underlying structure from co-occurrence data is an important task in many fields, including: insurance, intelligence, criminal investigation, epidemiology, human resources, and marketing. For example a store may wish to identify underlying sets of items purchased together or a human resources department may wish to identify groups of employees that collaborate with each other. Previously Kubica et. al. presented the group detection algorithm (GDA) - an algorithm for finding underlying groupings of entities from co-occurrence data. This algorithm is based on a probabilistic generative model and produces coherent groups that are consistent with prior knowledge. Unfortunately, the optimization used in GDA is slow, making it potentially infeasible for many real world data sets. To this end, we present k-groups - an algorithm that uses an approach similar to that of k-means (hard clustering and localized updates) to significantly accelerate the discovery of the underlying groups while retaining GDA's probabilistic model. In addition, we show that k-groups is guaranteed to converge to a local minimum. We also compare the performance of GDA and k-groups on several real world and artificial data sets, showing that k-groups' sacrifice in solution quality is significantly offset by its increase in speed. This trade-off makes group detection tractable on significantly larger data sets.}, address = {Pittsburgh, PA}, author = {Kubica, Jeremy Martin and Moore, Andrew and Schneider, Jeff}, institution = {Robotics Institute, Carnegie Mellon University}, interhash = {cecbc69533ab6d63fd478c7a9c7651a1}, intrahash = {3a4df0e814c3a1b125e3d403abe48733}, month = {September}, number = {CMU-RI-TR-03-32}, title = {K-groups: Tractable Group Detection on Large Link Data Sets}, url = {http://www.ri.cmu.edu/pubs/pub_4489.html}, year = 2003 } @article{clauset-2004-70, author = {Clauset, Aaron and Newman, M. E. J. and Moore, Cristopher}, interhash = {2c68e3c981a00380692a3b0b661d7cfd}, intrahash = {0ea285bfc0f5a46ffec8a213e5133ba6}, journal = {Physical Review E}, pages = 066111, title = {Finding community structure in very large networks}, url = {http://www.citebase.org/abstract?id=oai:arXiv.org:cond-mat/0408187}, volume = 70, year = 2004 } @inproceedings{zhang96birch, author = {Zhang, Tian and Ramakrishnan, Raghu and Livny, Miron}, booktitle = {Proceedings of the 1996 ACM SIGMOD International Conference on Management of Data (SIGMOD'96)}, interhash = {bd3d8e33e8785ecf66408081db016ca4}, intrahash = {d8ede3f66d485d95578bdc3eeda11fc3}, pages = {103--114}, title = {{BIRCH}: an efficient data clustering method for very large databases}, url = {http://citeseer.ist.psu.edu/zhang96birch.html}, year = 1996 } @inproceedings{conf/sigmod/WangWYY02, author = {Wang, Haixun and 0010, Wei Wang and Yang, Jiong and Yu, Philip S.}, booktitle = {SIGMOD Conference}, crossref = {conf/sigmod/2002}, date = {2009-06-28}, editor = {Franklin, Michael J. and Moon, Bongki and Ailamaki, Anastassia}, ee = {http://doi.acm.org/10.1145/564691.564737}, interhash = {9da0e61a2ac3ac371edfb251fbbfc2ae}, intrahash = {5ad941d8f0a06bb5e570e22a8cc58d92}, isbn = {1-58113-497-5}, pages = {394-405}, publisher = {ACM}, title = {Clustering by pattern similarity in large data sets.}, url = {http://dblp.uni-trier.de/db/conf/sigmod/sigmod2002.html#WangWYY02}, year = 2002 } @book{cullum2002lanczos, author = {Cullum, J.K. and Willoughby, R.A.}, interhash = {cfc97672c78c590391fc6f731d48ddc2}, intrahash = {b6fa6a375178396d472af2006fb21fc8}, publisher = {Society for Industrial Mathematics}, title = {{Lanczos algorithms for large symmetric eigenvalue computations: Theory}}, url = {http://scholar.google.de/scholar.bib?q=info:zshJq2GVHO8J:scholar.google.com/&output=citation&hl=de&ct=citation&cd=0}, year = 2002 } @article{356494, abstract = {
|
||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||
REFERENCES
INDEX TERMS
Primary Classification:
Additional Classification:
Keywords:
Peer to Peer - Readers of this Article have also read:
|