@misc{kang2013lalda, abstract = {Social media users have finite attention which limits the number of incoming messages from friends they can process. Moreover, they pay more attention to opinions and recommendations of some friends more than others. In this paper, we propose LA-LDA, a latent topic model which incorporates limited, non-uniformly divided attention in the diffusion process by which opinions and information spread on the social network. We show that our proposed model is able to learn more accurate user models from users' social network and item adoption behavior than models which do not take limited attention into account. We analyze voting on news items on the social news aggregator Digg and show that our proposed model is better able to predict held out votes than alternative models. Our study demonstrates that psycho-socially motivated models have better ability to describe and predict observed behavior than models which only consider topics.}, author = {Kang, Jeon-Hyung and Lerman, Kristina and Getoor, Lise}, interhash = {18a900ae003a2aedb3879fcaaa4e89b6}, intrahash = {84ae222ddb615ca8ae9421a29c07a8f6}, note = {cite arxiv:1301.6277Comment: The 2013 International Conference on Social Computing, Behavioral-Cultural Modeling, & Prediction (SBP 2013)}, title = {LA-LDA: A Limited Attention Topic Model for Social Recommendation}, url = {http://arxiv.org/abs/1301.6277}, year = 2013 } @article{bhattacharya2007collective, abstract = {Many databases contain uncertain and imprecise references to real-world entities. The absence of identifiers for the underlying entities often results in a database which contains multiple references to the same entity. This can lead not only to data redundancy, but also inaccuracies in query processing and knowledge extraction. These problems can be alleviated through the use of entity resolution. Entity resolution involves discovering the underlying entities and mapping each database reference to these entities. Traditionally, entities are resolved using pairwise similarity over the attributes of references. However, there is often additional relational information in the data. Specifically, references to different entities may cooccur. In these cases, collective entity resolution, in which entities for cooccurring references are determined jointly rather than independently, can improve entity resolution accuracy. We propose a novel relational clustering algorithm that uses both attribute and relational information for determining the underlying domain entities, and we give an efficient implementation. We investigate the impact that different relational similarity measures have on entity resolution quality. We evaluate our collective entity resolution algorithm on multiple real-world databases. We show that it improves entity resolution performance over both attribute-based baselines and over algorithms that consider relational information but do not resolve entities collectively. In addition, we perform detailed experiments on synthetically generated data to identify data characteristics that favor collective relational resolution over purely attribute-based algorithms.}, acmid = {1217304}, address = {New York, NY, USA}, articleno = {5}, author = {Bhattacharya, Indrajit and Getoor, Lise}, doi = {10.1145/1217299.1217304}, interhash = {3fdd3dfe026b0f18c7b9927ebe471cf1}, intrahash = {5c65a3d97ac6933ca2f63480630d99cf}, issn = {1556-4681}, issue = {1}, issue_date = {March 2007}, journal = {ACM Transactions on Knowledge Discovery from Data}, month = mar, number = 1, publisher = {ACM}, title = {Collective entity resolution in relational data}, url = {http://doi.acm.org/10.1145/1217299.1217304}, volume = 1, year = 2007 } @inproceedings{plangprasopchok2010probabilistic, abstract = {Learning structured representations has emerged as an important problem in many domains, including document and Web data mining, bioinformatics, and image analysis. One approach to learning complex structures is to integrate many smaller, incomplete and noisy structure fragments. In this work, we present an unsupervised probabilistic approach that extends affinity propagation to combine the small ontological fragments into a collection of integrated, consistent, and larger folksonomies. This is a challenging task because the method must aggregate similar structures while avoiding structural inconsistencies and handling noise. We validate the approach on a real-world social media dataset, comprised of shallow personal hierarchies specified by many individual users, collected from the photosharing website Flickr. Our empirical results show that our proposed approach is able to construct deeper and denser structures, compared to an approach using only the standard affinity propagation algorithm. Additionally, the approach yields better overall integration quality than a state-of-the-art approach based on incremental relational clustering. }, author = {Plangprasopchok, Anon and Lerman, Kristina and Getoor, Lise}, booktitle = {Proceedings of the 4th ACM Web Search and Data Mining Conference}, interhash = {826359ec25dcd228ad3ef46dcc6d26c5}, intrahash = {455bb173bb33af58bc8aaed48d8a8513}, note = {cite arxiv:1011.3557Comment: In Proceedings of the 4th ACM Web Search and Data Mining Conference (WSDM)}, title = {A Probabilistic Approach for Learning Folksonomies from Structured Data}, url = {http://arxiv.org/abs/1011.3557}, year = 2010 } @inproceedings{plangprasopchok2010growing, abstract = {Many social Web sites allow users to annotate the content with descriptive metadata, such as tags, and more recently to organize content hierarchically. These types of structured metadata provide valuable evidence for learning how a com- munity organizes knowledge. For instance, we can aggre- gate many personal hierarchies into a common taxonomy, also known as a folksonomy, that will aid users in visualiz- ing and browsing social content, and also to help them in organizing their own content. However, learning from social metadata presents several challenges, since it is sparse, shal- low, ambiguous, noisy, and inconsistent. We describe an ap- proach to folksonomy learning based on relational clustering, which exploits structured metadata contained in personal hierarchies. Our approach clusters similar hierarchies using their structure and tag statistics, then incrementally weaves them into a deeper, bushier tree. We study folksonomy learning using social metadata extracted from the photo- sharing site Flickr, and demonstrate that the proposed ap- proach addresses the challenges. Moreover, comparing to previous work, the approach produces larger, more accurate folksonomies, and in addition, scales better.}, author = {Plangprasopchok, Anon and Lerman, Kristina and Getoor, Lise}, booktitle = {KDD}, crossref = {conf/kdd/2010}, editor = {Rao, Bharat and Krishnapuram, Balaji and Tomkins, Andrew and Yang, Qiang}, ee = {http://doi.acm.org/10.1145/1835804.1835924}, file = {plangprasopchok2010growing.pdf:plangprasopchok2010growing.pdf:PDF}, groups = {public}, interhash = {d8738d21c4d25559d7dbcc0aa6647223}, intrahash = {11fbd76695bf0de7499c1721723661fe}, isbn = {978-1-4503-0055-1}, pages = {949-958}, publisher = {ACM}, timestamp = {2011-02-02 15:03:59}, title = {Growing a tree in the forest: constructing folksonomies by integrating structured metadata.}, url = {http://dblp.uni-trier.de/db/conf/kdd/kdd2010.html#PlangprasopchokLG10}, username = {dbenz}, year = 2010 } @misc{noauthororeditorpipeline, author = {Namata, Galileo Mark and Getoor, Lise}, booktitle = {7 th International Workshop on Mining and Learning with Graphs}, interhash = {c341a683d8bac1896a962d8907284b3d}, intrahash = {88f20464c94d29ad9d2f2cd9ba82d3f9}, title = {A Pipeline Approach to Graph Identication }, url = {http://linqs.cs.umd.edu/basilic/web/Publications/}, year = 2009 } @inproceedings{zheleva2009join, abstract = {In order to address privacy concerns, many social media websites allow users to hide their personal profiles from the public. In this work, we show how an adversary can exploit an online social network with a mixture of public and private user profiles to predict the private attributes of users. We map this problem to a relational classification problem and we propose practical models that use friendship and group membership information (which is often not hidden) to infer sensitive attributes. The key novel idea is that in addition to friendship links, groups can be carriers of significant information. We show that on several well-known social media sites, we can easily and accurately recover the information of private-profile users. To the best of our knowledge, this is the first work that uses link-based and group-based classification to study privacy implications in social networks with mixed public and private user profiles.}, address = {New York, NY, USA}, author = {Zheleva, Elena and Getoor, Lise}, booktitle = {WWW '09: Proceedings of the 18th International Conference on World Wide Web}, doi = {10.1145/1526709.1526781}, interhash = {4726d0a13b0337998d6d0f54fc5c26e9}, intrahash = {25e6c200ace070886f01d7d30957b504}, isbn = {978-1-60558-487-4}, location = {Madrid, Spain}, month = apr, pages = {531--540}, publisher = {ACM}, title = {To join or not to join: the illusion of privacy in social networks with mixed public and private user profiles}, url = {http://portal.acm.org/citation.cfm?id=1526709.1526781&coll=GUIDE&dl=acm&type=series&idx=SERIES968&part=series&WantType=Proceedings&title=WWW}, year = 2009 } @article{gd05link, address = {New York, NY, USA}, author = {Getoor, Lise and Diehl, Christopher P.}, doi = {http://doi.acm.org/10.1145/1117454.1117456}, interhash = {d71ff8114c8b062f36d58ca649bc2a04}, intrahash = {ac02f1d7dea7a106bc4103c8a9ec4aef}, journal = {SIGKDD Explor. Newsl.}, number = 2, pages = {3--12}, publisher = {ACM Press}, title = {Link mining: a survey}, url = {http://www.cpdiehl.org/lmsurvey.pdf}, volume = 7, year = 2005 } @article{1117456, address = {New York, NY, USA}, author = {Getoor, Lise and Diehl, Christopher P.}, doi = {http://doi.acm.org/10.1145/1117454.1117456}, interhash = {d71ff8114c8b062f36d58ca649bc2a04}, intrahash = {ac02f1d7dea7a106bc4103c8a9ec4aef}, journal = {SIGKDD Explor. Newsl.}, number = 2, pages = {3--12}, publisher = {ACM Press}, title = {Link mining: a survey}, url = {http://www.cpdiehl.org/lmsurvey.pdf}, volume = 7, year = 2005 } @article{DBLP:journals/sigkdd/HothoSG04, author = {Hotho, Andreas and Sure, York and Getoor, Lise}, bibsource = {DBLP, http://dblp.uni-trier.de}, ee = {http://doi.acm.org/10.1145/1046482}, interhash = {02a216da151c9bd84ec4c131e1a43f89}, intrahash = {1aae46b03f32f0d69caa735abe81825e}, journal = {SIGKDD Explorations}, number = 2, pages = {142-143}, title = {A workshop report: mining for and from the Semantic Web at KDD 2004.}, volume = 6, year = 2004 }