@misc{goldenberg2009survey, abstract = {Networks are ubiquitous in science and have become a focal point for discussion in everyday life. Formal statistical models for the analysis of network data have emerged as a major topic of interest in diverse areas of study, and most of these involve a form of graphical representation. Probability models on graphs date back to 1959. Along with empirical studies in social psychology and sociology from the 1960s, these early works generated an active network community and a substantial literature in the 1970s. This effort moved into the statistical literature in the late 1970s and 1980s, and the past decade has seen a burgeoning network literature in statistical physics and computer science. The growth of the World Wide Web and the emergence of online networking communities such as Facebook, MySpace, and LinkedIn, and a host of more specialized professional network communities has intensified interest in the study of networks and network data. Our goal in this review is to provide the reader with an entry point to this burgeoning literature. We begin with an overview of the historical development of statistical network modeling and then we introduce a number of examples that have been studied in the network literature. Our subsequent discussion focuses on a number of prominent static and dynamic network models and their interconnections. We emphasize formal model descriptions, and pay special attention to the interpretation of parameters and their estimation. We end with a description of some open problems and challenges for machine learning and statistics.}, author = {Goldenberg, Anna and Zheng, Alice X and Fienberg, Stephen E and Airoldi, Edoardo M}, interhash = {bab22de06306d84cf357aadf48982d87}, intrahash = {5e341981218d7cd89416c3371d56c794}, note = {cite arxiv:0912.5410Comment: 96 pages, 14 figures, 333 references}, title = {A survey of statistical network models}, url = {http://arxiv.org/abs/0912.5410}, year = 2009 } @inproceedings{Sautter:2012:IBR:2403832.2403883, abstract = {Parsing details like author names and titles out of bibliographic references of scientific publications is an important issue. However, most existing techniques are tailored to the highly standardized reference styles used in the last two to three decades. Their performance tends to degrade when faced with the wider variety of reference styles used in older, historic publications. Thus, existing techniques are of limited use when creating comprehensive bibliographies covering both historic and contemporary scientific publications. This paper presents RefParse, a generic approach to bibliographic reference parsing that is independent of any specific reference style. Its core feature is an inference mechanism that exploits the regularities inherent in any list of references to deduce its format. Our evaluation shows that RefParse outperforms existing parsers both for contemporary and for historic reference lists.}, acmid = {2403883}, address = {Berlin, Heidelberg}, author = {Sautter, Guido and B\"{o}hm, Klemens}, booktitle = {Proceedings of the Second International Conference on Theory and Practice of Digital Libraries}, doi = {10.1007/978-3-642-33290-6_40}, interhash = {20fe241af3945dca2e242ae72eae05ad}, intrahash = {ce9a27e85a0cc6bef109d5130e7ed1ea}, isbn = {978-3-642-33289-0}, location = {Paphos, Cyprus}, numpages = {13}, pages = {370--382}, publisher = {Springer-Verlag}, series = {TPDL'12}, title = {Improved Bibliographic Reference Parsing Based on Repeated Patterns}, url = {http://dx.doi.org/10.1007/978-3-642-33290-6_40}, year = 2012 } @article{Alonso2009273, author = {Alonso, S. and Cabrerizo, F.J. and Herrera-Viedma, E. and Herrera, F.}, doi = {http://dx.doi.org/10.1016/j.joi.2009.04.001}, interhash = {cbf95718465346edecef397149e4cf51}, intrahash = {859c208f329fa96e26e35f1bcb7ab65d}, issn = {1751-1577}, journal = {Journal of Informetrics }, number = 4, pages = {273 - 289}, title = {h-Index: A review focused in its variants, computation and standardization for different scientific fields }, url = {http://www.sciencedirect.com/science/article/pii/S1751157709000339}, volume = 3, year = 2009 } @inproceedings{conf/pkdd/BalasubramanyanDC13, author = {Balasubramanyan, Ramnath and Dalvi, Bhavana Bharat and Cohen, William W.}, booktitle = {ECML/PKDD (2)}, crossref = {conf/pkdd/2013-2}, editor = {Blockeel, Hendrik and Kersting, Kristian and Nijssen, Siegfried and Zelezný, Filip}, ee = {http://dx.doi.org/10.1007/978-3-642-40991-2_40}, interhash = {9a32b7cc059a500ea302d0aa65036682}, intrahash = {e56623d21a1b7bcb442cd15fe098bb70}, isbn = {978-3-642-40990-5}, pages = {628-642}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, title = {From Topic Models to Semi-supervised Learning: Biasing Mixed-Membership Models to Exploit Topic-Indicative Features in Entity Clustering.}, url = {http://dblp.uni-trier.de/db/conf/pkdd/pkdd2013-2.html#BalasubramanyanDC13}, volume = 8189, year = 2013 } @inproceedings{Ramage:2009:LLS:1699510.1699543, abstract = {A significant portion of the world's text is tagged by readers on social bookmarking websites. Credit attribution is an inherent problem in these corpora because most pages have multiple tags, but the tags do not always apply with equal specificity across the whole document. Solving the credit attribution problem requires associating each word in a document with the most appropriate tags and vice versa. This paper introduces Labeled LDA, a topic model that constrains Latent Dirichlet Allocation by defining a one-to-one correspondence between LDA's latent topics and user tags. This allows Labeled LDA to directly learn word-tag correspondences. We demonstrate Labeled LDA's improved expressiveness over traditional LDA with visualizations of a corpus of tagged web pages from del.icio.us. Labeled LDA outperforms SVMs by more than 3 to 1 when extracting tag-specific document snippets. As a multi-label text classifier, our model is competitive with a discriminative baseline on a variety of datasets.}, acmid = {1699543}, address = {Stroudsburg, PA, USA}, author = {Ramage, Daniel and Hall, David and Nallapati, Ramesh and Manning, Christopher D.}, booktitle = {Proceedings of the 2009 Conference on Empirical Methods in Natural Language Processing: Volume 1 - Volume 1}, interhash = {45315f4da7b10debdca560506cf0d7ba}, intrahash = {6e7173f084e26bca9a8d2a1ab4a5b709}, isbn = {978-1-932432-59-6}, location = {Singapore}, numpages = {9}, pages = {248--256}, publisher = {Association for Computational Linguistics}, series = {EMNLP '09}, title = {Labeled LDA: A Supervised Topic Model for Credit Attribution in Multi-labeled Corpora}, url = {http://dl.acm.org/citation.cfm?id=1699510.1699543}, year = 2009 } @article{thurau2012descriptive, abstract = {Climate change, the global energy footprint, and strategies for sustainable development have become topics of considerable political and public interest. The public debate is informed by an exponentially growing amount of data and there are diverse partisan interest when it comes to interpretation. We therefore believe that data analysis methods are called for that provide results which are intuitively understandable even to non-experts. Moreover, such methods should be efficient so that non-experts users can perform their own analysis at low expense in order to understand the effects of different parameters and influential factors. In this paper, we discuss a new technique for factorizing data matrices that meets both these requirements. The basic idea is to represent a set of data by means of convex combinations of extreme data points. This often accommodates human cognition. In contrast to established factorization methods, the approach presented in this paper can also determine over-complete bases. At the same time, convex combinations allow for highly efficient matrix factorization. Based on techniques adopted from the field of distance geometry, we derive a linear time algorithm to determine suitable basis vectors for factorization. By means of the example of several environmental and developmental data sets we discuss the performance and characteristics of the proposed approach and validate that significant efficiency gains are obtainable without performance decreases compared to existing convexity constrained approaches.}, affiliation = {Fraunhofer Institute for Intelligent Analysis and Information Systems IAIS, Sankt Augustin, Germany}, author = {Thurau, Christian and Kersting, Kristian and Wahabzada, Mirwaes and Bauckhage, Christian}, doi = {10.1007/s10618-011-0216-z}, interhash = {457c57f054fea45dcbc8447263591d97}, intrahash = {387f4e1711d7065bd5a94455aeae1957}, issn = {1384-5810}, journal = {Data Mining and Knowledge Discovery}, keyword = {Computer Science}, number = 2, pages = {325-354}, publisher = {Springer Netherlands}, title = {Descriptive matrix factorization for sustainability Adopting the principle of opposites}, url = {http://dx.doi.org/10.1007/s10618-011-0216-z}, volume = 24, year = 2012 } @article{kataria2011context, abstract = {In a document network such as a citation network of scientific documents, web-logs etc., the content produced by authors exhibit their interest in certain topics. In addition some authors influence other authors' interests. In this work, we propose to model the influence of cited authors along with the interests of citing authors. Morover , we hypothesize that citations present in documents, the context surrounding the citation mention provides extra topical information about the cited authors. However, associating terms in the context to the cited authors remains an open problem. We propose novel document generation schemes that incorporate the context while simultaneously modeling the interests of citing authors and influence of the cited authors. Our experiments show significant improvements over baseline models for various evaluation criteria such as link prediction between document and cited author, and quantitatively explaining unseen text.}, author = {Kataria, Saurabh and Mitra, Prasenjit and Caragea, Cornelia and Giles, C.}, conference = {International Joint Conference on Artificial Intelligence}, interhash = {7496b4df1335fbc6aea691cecb65289d}, intrahash = {dc774d17ec721be6d32530d265f34539}, title = {Context Sensitive Topic Models for Author Influence in Document Networks}, url = {https://www.aaai.org/ocs/index.php/IJCAI/IJCAI11/paper/view/3140}, year = 2011 } @inproceedings{mitchell2015, author = {Mitchell, T. and Cohen, W. and Hruscha, E. and Talukdar, P. and Betteridge, J. and Carlson, A. and Dalvi, B. and Gardner, M. and Kisiel, B. and Krishnamurthy, J. and Lao, N. and Mazaitis, K. and Mohammad, T. and Nakashole, N. and Platanios, E. and Ritter, A. and Samadi, M. and Settles, B. and Wang, R. and Wijaya, D. and Gupta, A. and Chen, X. and Saparov, A. and Greaves, M. and Welling, J.}, booktitle = {AAAI}, interhash = {52d0d71f6f5b332dabc1412f18e3a93d}, intrahash = {63070703e6bb812852cca56574aed093}, note = {: Never-Ending Learning in AAAI-2015}, title = {Never-Ending Learning}, url = {http://www.cs.cmu.edu/~wcohen/pubs.html}, year = 2015 } @inproceedings{Kumar:2015:IS:2684822.2685310, abstract = {We consider the problem of inferring choices made by users based only on aggregate data containing the relative popularity of each item. We propose a framework that models the problem as that of inferring a Markov chain given a stationary distribution. Formally, we are given a graph and a target steady-state distribution on its nodes. We are also give a mapping from per-node scores to a transition matrix, from a broad family of such mappings. The goal is to set the scores of each node such that the resulting transition matrix induces the desired steady state. We prove sufficient conditions under which this problem is feasible and, for the feasible instances, obtain a simple algorithm for a generic version of the problem. This iterative algorithm provably finds the unique solution to this problem and has a polynomial rate of convergence; in practice we find that the algorithm converges after fewer than ten iterations. We then apply this framework to choice problems in online settings and show that our algorithm is able to explain the observed data and predict the user choices much better than other competing baselines across a variety of diverse datasets.}, acmid = {2685310}, address = {New York, NY, USA}, author = {Kumar, Ravi and Tomkins, Andrew and Vassilvitskii, Sergei and Vee, Erik}, booktitle = {Proceedings of the Eighth ACM International Conference on Web Search and Data Mining}, doi = {10.1145/2684822.2685310}, interhash = {15326871c92155e46259db7cb455d584}, intrahash = {e0e10a01d0f65da00f5390482407abd2}, isbn = {978-1-4503-3317-7}, location = {Shanghai, China}, numpages = {10}, pages = {359--368}, publisher = {ACM}, series = {WSDM '15}, title = {Inverting a Steady-State}, url = {http://doi.acm.org/10.1145/2684822.2685310}, year = 2015 } @article{noKey, abstract = {The extensive literature documenting the ecological effects of roads has repeatedly implicated noise as one of the causal factors. Recent studies of wildlife responses to noise have decisively identified changes in animal behaviors and spatial distributions that are caused by noise. Collectively, this research suggests that spatial extent and intensity of potential noise impacts to wildlife can be studied by mapping noise sources and modeling the propagation of noise across landscapes. Here we present models of energy extraction, aircraft overflight and roadway noise as examples of spatially extensive sources and to present tools available for landscape scale investigations. We focus these efforts in US National Parks (Mesa Verde, Grand Teton and Glacier) to highlight that ecological noise pollution is not a threat restricted to developed areas and that many protected natural areas experience significant noise loads. As a heuristic tool for understanding past and future noise pollution we forecast community noise utilizing a spatially-explicit land-use change model that depicts the intensity of human development at sub-county resolution. For road noise, we transform effect distances from two studies into sound levels to begin a discussion of noise thresholds for wildlife. The spatial scale of noise exposure is far larger than any protected area, and no site in the continental US is free form noise. The design of observational and experimental studies of noise effects should be informed by knowledge of regional noise exposure patterns.}, author = {Barber, Jesse R. and Burdett, Chris L. and Reed, Sarah E. and Warner, Katy A. and Formichella, Charlotte and Crooks, Kevin R. and Theobald, Dave M. and Fristrup, Kurt M.}, doi = {10.1007/s10980-011-9646-7}, interhash = {ebd2433210dffb7fecae1dcf14b4fa6b}, intrahash = {17c859ff5dba77ef46cb7677f5221519}, issn = {0921-2973}, journal = {Landscape Ecology}, language = {English}, number = 9, pages = {1281-1295}, publisher = {Springer Netherlands}, title = {Anthropogenic noise exposure in protected natural areas: estimating the scale of ecological consequences}, url = {http://dx.doi.org/10.1007/s10980-011-9646-7}, volume = 26, year = 2011 } @article{SSQU:SSQU478, abstract = {Objective. This study is an effort to produce a more systematic, empirically-based, historical-comparative understanding of media bias than generally is found in previous works.Methods. The research employs a quantitative measure of ideological bias in a formal content analysis of the United States' two largest circulation news magazines, Time and Newsweek. Findings are compared with the results of an identical examination of two of the nation's leading partisan journals, the conservative National Review and the liberal Progressive.Results. Bias scores reveal stark differences between the mainstream and the partisan news magazines' coverage of four issue areas: crime, the environment, gender, and poverty.Conclusion. Data provide little support for those claiming significant media bias in either ideological direction.}, author = {Covert, Tawnya J. Adkins and Wasburn, Philo C.}, doi = {10.1111/j.1540-6237.2007.00478.x}, interhash = {9276222b3b8684048db1e42c3a9f3409}, intrahash = {81474f00e1605d45462e23f743dc88bb}, issn = {1540-6237}, journal = {Social Science Quarterly}, number = 3, pages = {690--706}, publisher = {Blackwell Publishing Inc}, title = {Measuring Media Bias: A Content Analysis of Time and Newsweek Coverage of Domestic Social Issues, 1975–2000*}, url = {http://dx.doi.org/10.1111/j.1540-6237.2007.00478.x}, volume = 88, year = 2007 } @inproceedings{noauthororeditor, author = {Mirowski, Piotr and Ranzato, Marc'Aurelio and LeCun, Yann}, editor = {of the NIPS 2010 Workshop on Deep Learning, Proceedings}, interhash = {b7ce347e904a4ca3263cf6cc1e2253bd}, intrahash = {fc3e0e3af595f9a46df6bc9233df836f}, title = {Dynamic Auto-Encoders for Semantic Indexing}, url = {http://yann.lecun.com/exdb/publis/pdf/mirowski-nipsdl-10.pdf}, year = 2010 } @article{grimmer2013text, author = {Grimmer, Justin and Stewart, Brandon M}, interhash = {eb68e01ef4168a398d79f408042fe529}, intrahash = {76001ebc726700bef81886d2e285b7cf}, journal = {Political Analysis}, pages = {mps028}, publisher = {SPM-PMSAPSA}, title = {Text as data: The promise and pitfalls of automatic content analysis methods for political texts}, year = 2013 } @inproceedings{conf/conll/LevyG14, author = {Levy, Omer and Goldberg, Yoav}, booktitle = {CoNLL}, crossref = {conf/conll/2014}, editor = {Morante, Roser and tau Yih, Wen}, ee = {http://aclweb.org/anthology/W/W14/W14-1618.pdf}, interhash = {680dde1fd83a8dd0d6b2619a8266516e}, intrahash = {23bb00b6abab97ed93e74f3b5b148630}, isbn = {978-1-941643-02-0}, pages = {171-180}, publisher = {ACL}, title = {Linguistic Regularities in Sparse and Explicit Word Representations.}, url = {http://dblp.uni-trier.de/db/conf/conll/conll2014.html#LevyG14}, year = 2014 } @inproceedings{conf/icdm/DuBJ10, author = {Du, Lan and Buntine, Wray Lindsay and Jin, Huidong}, booktitle = {ICDM}, crossref = {conf/icdm/2010}, editor = {Webb, Geoffrey I. and 0001, Bing Liu and Zhang, Chengqi and Gunopulos, Dimitrios and Wu, Xindong}, ee = {http://doi.ieeecomputersociety.org/10.1109/ICDM.2010.51}, interhash = {dcde7dbdd419330aabb01d151e23c45c}, intrahash = {5a639efaf1e8fea6b0f309333efd7bee}, isbn = {978-0-7695-4256-0}, pages = {148-157}, publisher = {IEEE Computer Society}, title = {Sequential Latent Dirichlet Allocation: Discover Underlying Topic Structures within a Document.}, url = {http://dblp.uni-trier.de/db/conf/icdm/icdm2010.html#DuBJ10}, year = 2010 } @inproceedings{DBLP:conf/dsaa/KrompassNT14, author = {Krompass, Denis and Nickel, Maximilian and Tresp, Volker}, bibsource = {dblp computer science bibliography, http://dblp.org}, booktitle = {International Conference on Data Science and Advanced Analytics, {DSAA} 2014, Shanghai, China, October 30 - November 1, 2014}, crossref = {DBLP:conf/dsaa/2014}, doi = {10.1109/DSAA.2014.7058046}, interhash = {0ca986606c22ca0b3780c9b9c25f31c7}, intrahash = {c952ed96ece470e4fa5336eedf670d5b}, isbn = {978-1-4799-6991-3}, pages = {18--24}, publisher = {{IEEE}}, title = {Large-scale factorization of type-constrained multi-relational data}, url = {http://dx.doi.org/10.1109/DSAA.2014.7058046}, year = 2014 } @inproceedings{tran2015semantic, abstract = {In this paper we study the problem of semantic annotation for a trending hashtag which is the crucial step towards analyzing user behavior in social media, yet has been largely unexplored. We tackle the problem via linking to entities from Wikipedia. We incorporate the social aspects of trending hashtags by identifying prominent entities for the annotation so as to maximize the information spreading in entity networks. We exploit temporal dynamics of entities in Wikipedia, namely Wikipedia edits and page views to improve the annotation quality. Our experiments show that we significantly outperform the established methods in tweet annotation.}, author = {Tran, Tuan and Tran, Nam-Khanh and Teka Hadgu, Asmelash and Jäschke, Robert}, booktitle = {Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing (EMNLP)}, interhash = {4156275c801376fa64dfdb69a4ce60c4}, intrahash = {9d4cd9070922e1eb43bcab1da4a9d840}, month = sep, publisher = {Association for Computational Linguistics}, title = {Semantic Annotation for Microblog Topics Using Wikipedia Temporal Information}, year = 2015 } @article{mnih2015humanlevel, author = {Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Rusu, Andrei A. and Veness, Joel and Bellemare, Marc G. and Graves, Alex and Riedmiller, Martin and Fidjeland, Andreas K. and Ostrovski, Georg and Petersen, Stig and Beattie, Charles and Sadik, Amir and Antonoglou, Ioannis and King, Helen and Kumaran, Dharshan and Wierstra, Daan and Legg, Shane and Hassabis, Demis}, interhash = {eac59980357d99db87b341b61ef6645f}, intrahash = {fb15f4471c81dc2b9edf2304cb2f7083}, issn = {00280836}, journal = {Nature}, month = feb, number = 7540, pages = {529--533}, publisher = {Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.}, title = {Human-level control through deep reinforcement learning}, url = {http://dx.doi.org/10.1038/nature14236}, volume = 518, year = 2015 } @inproceedings{conf/www/SinhaSSMEHW15, author = {Sinha, Arnab and Shen, Zhihong and Song, Yang and Ma, Hao and Eide, Darrin and Hsu, Bo-June Paul and Wang, Kuansan}, booktitle = {WWW (Companion Volume)}, crossref = {conf/www/2015c}, editor = {Gangemi, Aldo and Leonardi, Stefano and Panconesi, Alessandro}, ee = {http://doi.acm.org/10.1145/2740908.2742839}, interhash = {6d71a6eb1d070023f6fb75a5f1019a21}, intrahash = {e6066395c31b2f3de9fb836dbac5723a}, isbn = {978-1-4503-3473-0}, pages = {243-246}, publisher = {ACM}, title = {An Overview of Microsoft Academic Service (MAS) and Applications.}, url = {http://dblp.uni-trier.de/db/conf/www/www2015c.html#SinhaSSMEHW15}, year = 2015 } @inproceedings{conf/wsdm/KohlschutterFN10, author = {Kohlschütter, Christian and Fankhauser, Peter and Nejdl, Wolfgang}, booktitle = {Proc. of 3rd ACM International Conference on Web Search and Data Mining New York City, NY USA (WSDM 2010).}, interhash = {25ea118166ef2f0d5597ca90fa702c9d}, intrahash = {dbc8464d9a298afa49d607d65f2160e2}, title = {Boilerplate Detection using Shallow Text Features}, year = 2010 }