@misc{ugander2011anatomy, abstract = {We study the structure of the social graph of active Facebook users, the largest social network ever analyzed. We compute numerous features of the graph including the number of users and friendships, the degree distribution, path lengths, clustering, and mixing patterns. Our results center around three main observations. First, we characterize the global structure of the graph, determining that the social network is nearly fully connected, with 99.91% of individuals belonging to a single large connected component, and we confirm the "six degrees of separation" phenomenon on a global scale. Second, by studying the average local clustering coefficient and degeneracy of graph neighborhoods, we show that while the Facebook graph as a whole is clearly sparse, the graph neighborhoods of users contain surprisingly dense structure. Third, we characterize the assortativity patterns present in the graph by studying the basic demographic and network properties of users. We observe clear degree assortativity and characterize the extent to which "your friends have more friends than you". Furthermore, we observe a strong effect of age on friendship preferences as well as a globally modular community structure driven by nationality, but we do not find any strong gender homophily. We compare our results with those from smaller social networks and find mostly, but not entirely, agreement on common structural network characteristics.}, author = {Ugander, Johan and Karrer, Brian and Backstrom, Lars and Marlow, Cameron}, interhash = {968abebf69b5959d2837eefcda3a8a32}, intrahash = {efad3d029704f09829373a443eeefdde}, note = {cite arxiv:1111.4503Comment: 17 pages, 9 figures, 1 table}, title = {The Anatomy of the Facebook Social Graph}, url = {http://arxiv.org/abs/1111.4503}, year = 2011 } @inproceedings{backstrom2011supervised, abstract = {Predicting the occurrence of links is a fundamental problem in networks. In the link prediction problem we are given a snapshot of a network and would like to infer which interactions among existing members are likely to occur in the near future or which existing interactions are we missing. Although this problem has been extensively studied, the challenge of how to effectively combine the information from the network structure with rich node and edge attribute data remains largely open.

We develop an algorithm based on Supervised Random Walks that naturally combines the information from the network structure with node and edge level attributes. We achieve this by using these attributes to guide a random walk on the graph. We formulate a supervised learning task where the goal is to learn a function that assigns strengths to edges in the network such that a random walker is more likely to visit the nodes to which new links will be created in the future. We develop an efficient training algorithm to directly learn the edge strength estimation function.

Our experiments on the Facebook social graph and large collaboration networks show that our approach outperforms state-of-the-art unsupervised approaches as well as approaches that are based on feature extraction.}, acmid = {1935914}, address = {New York, NY, USA}, author = {Backstrom, Lars and Leskovec, Jure}, booktitle = {Proceedings of the fourth ACM international conference on Web search and data mining}, doi = {10.1145/1935826.1935914}, interhash = {94f21249839cf875da4ad8842cd37d15}, intrahash = {999a159de862039db86fe74f808526e3}, isbn = {978-1-4503-0493-1}, location = {Hong Kong, China}, numpages = {10}, pages = {635--644}, publisher = {ACM}, series = {WSDM '11}, title = {Supervised random walks: predicting and recommending links in social networks}, url = {http://doi.acm.org/10.1145/1935826.1935914}, year = 2011 } @inproceedings{Backstrom:2007:WAT:1242572.1242598, abstract = {In a social network, nodes correspond topeople or other social entities, and edges correspond to social links between them. In an effort to preserve privacy, the practice of anonymization replaces names with meaningless unique identifiers. We describe a family of attacks such that even from a single anonymized copy of a social network, it is possible for an adversary to learn whether edges exist or not between specific targeted pairs of nodes.}, acmid = {1242598}, address = {New York, NY, USA}, author = {Backstrom, Lars and Dwork, Cynthia and Kleinberg, Jon}, booktitle = {Proceedings of the 16th international conference on World Wide Web}, doi = {10.1145/1242572.1242598}, interhash = {aa7d0f96c372d2c03d228f27a7f4b66b}, intrahash = {913059fcbf0453c60ff8b79e2705742c}, isbn = {978-1-59593-654-7}, location = {Banff, Alberta, Canada}, numpages = {10}, pages = {181--190}, publisher = {ACM}, series = {WWW '07}, title = {Wherefore art thou r3579x?: anonymized social networks, hidden patterns, and structural steganography}, url = {http://doi.acm.org/10.1145/1242572.1242598}, year = 2007 } @inproceedings{1557077, abstract = {Tracking new topics, ideas, and "memes" across the Web has been an issue of considerable interest. Recent work has developed methods for tracking topic shifts over long time scales, as well as abrupt spikes in the appearance of particular named entities. However, these approaches are less well suited to the identification of content that spreads widely and then fades over time scales on the order of days - the time scale at which we perceive news and events. We develop a framework for tracking short, distinctive phrases that travel relatively intact through on-line text; developing scalable algorithms for clustering textual variants of such phrases, we identify a broad class of memes that exhibit wide spread and rich variation on a daily basis. As our principal domain of study, we show how such a meme-tracking approach can provide a coherent representation of the news cycle - the daily rhythms in the news media that have long been the subject of qualitative interpretation but have never been captured accurately enough to permit actual quantitative analysis. We tracked 1.6 million mainstream media sites and blogs over a period of three months with the total of 90 million articles and we find a set of novel and persistent temporal patterns in the news cycle. In particular, we observe a typical lag of 2.5 hours between the peaks of attention to a phrase in the news media and in blogs respectively, with divergent behavior around the overall peak and a "heartbeat"-like pattern in the handoff between news and blogs. We also develop and analyze a mathematical model for the kinds of temporal variation that the system exhibits.}, address = {New York, NY, USA}, author = {Leskovec, Jure and Backstrom, Lars and Kleinberg, Jon}, booktitle = {KDD '09: Proceedings of the 15th ACM SIGKDD international conference on Knowledge discovery and data mining}, doi = {http://doi.acm.org/10.1145/1557019.1557077}, interhash = {f60a96f8adb340b62bacbc90fdb3e069}, intrahash = {051df7b09db1d7806909cc22c1a362c8}, isbn = {978-1-60558-495-9}, location = {Paris, France}, pages = {497--506}, publisher = {ACM}, title = {Meme-tracking and the dynamics of the news cycle}, url = {http://portal.acm.org/citation.cfm?id=1557077}, year = 2009 } @inproceedings{conf/kdd/BackstromHKL06, author = {Backstrom, Lars and Huttenlocher, Daniel P. and Kleinberg, Jon M. and Lan, Xiangyang}, booktitle = {KDD}, crossref = {conf/kdd/2006}, date = {2006-10-05}, editor = {Eliassi-Rad, Tina and Ungar, Lyle H. and Craven, Mark and Gunopulos, Dimitrios}, ee = {http://doi.acm.org/10.1145/1150402.1150412}, interhash = {a3cda51b88fd4ff49632bd6b393b5b6b}, intrahash = {d7f58740d7b63881ba4993d0a576be94}, isbn = {1-59593-339-5}, pages = {44-54}, publisher = {ACM}, title = {Group formation in large social networks: membership, growth, and evolution.}, url = {http://dblp.uni-trier.de/db/conf/kdd/kdd2006.html#BackstromHKL06}, year = 2006 }