@inproceedings{conf/wsdm/KohlschutterFN10, author = {Kohlschütter, Christian and Fankhauser, Peter and Nejdl, Wolfgang}, booktitle = {Proc. of 3rd ACM International Conference on Web Search and Data Mining New York City, NY USA (WSDM 2010).}, interhash = {25ea118166ef2f0d5597ca90fa702c9d}, intrahash = {dbc8464d9a298afa49d607d65f2160e2}, title = {Boilerplate Detection using Shallow Text Features}, year = 2010 } @inproceedings{conf/www/SinhaSSMEHW15, author = {Sinha, Arnab and Shen, Zhihong and Song, Yang and Ma, Hao and Eide, Darrin and Hsu, Bo-June Paul and Wang, Kuansan}, booktitle = {WWW (Companion Volume)}, crossref = {conf/www/2015c}, editor = {Gangemi, Aldo and Leonardi, Stefano and Panconesi, Alessandro}, ee = {http://doi.acm.org/10.1145/2740908.2742839}, interhash = {6d71a6eb1d070023f6fb75a5f1019a21}, intrahash = {e6066395c31b2f3de9fb836dbac5723a}, isbn = {978-1-4503-3473-0}, pages = {243-246}, publisher = {ACM}, title = {An Overview of Microsoft Academic Service (MAS) and Applications.}, url = {http://dblp.uni-trier.de/db/conf/www/www2015c.html#SinhaSSMEHW15}, year = 2015 } @article{10.1371/journal.pone.0136763, abstract = {

The issue of sustainability is at the top of the political and societal agenda, being considered of extreme importance and urgency. Human individual action impacts the environment both locally (e.g., local air/water quality, noise disturbance) and globally (e.g., climate change, resource use). Urban environments represent a crucial example, with an increasing realization that the most effective way of producing a change is involving the citizens themselves in monitoring campaigns (a citizen science bottom-up approach). This is possible by developing novel technologies and IT infrastructures enabling large citizen participation. Here, in the wider framework of one of the first such projects, we show results from an international competition where citizens were involved in mobile air pollution monitoring using low cost sensing devices, combined with a web-based game to monitor perceived levels of pollution. Measures of shift in perceptions over the course of the campaign are provided, together with insights into participatory patterns emerging from this study. Interesting effects related to inertia and to direct involvement in measurement activities rather than indirect information exposure are also highlighted, indicating that direct involvement can enhance learning and environmental awareness. In the future, this could result in better adoption of policies towards decreasing pollution.

}, author = {Sîrbu, Alina and Becker, Martin and Caminiti, Saverio and De Baets, Bernard and Elen, Bart and Francis, Louise and Gravino, Pietro and Hotho, Andreas and Ingarra, Stefano and Loreto, Vittorio and Molino, Andrea and Mueller, Juergen and Peters, Jan and Ricchiuti, Ferdinando and Saracino, Fabio and Servedio, Vito D. P. and Stumme, Gerd and Theunis, Jan and Tria, Francesca and Van den Bossche, Joris}, doi = {10.1371/journal.pone.0136763}, interhash = {6abb09b5ac2137e557a84d7be10009b4}, intrahash = {f35761dd0fbd9ad8af7c8099e0b6aac4}, journal = {PLoS ONE}, month = {08}, number = 8, pages = {e0136763}, publisher = {Public Library of Science}, title = {Participatory Patterns in an International Air Quality Monitoring Initiative}, url = {http://dx.doi.org/10.1371%2Fjournal.pone.0136763}, volume = 10, year = 2015 } @article{mnih2015humanlevel, author = {Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Rusu, Andrei A. and Veness, Joel and Bellemare, Marc G. and Graves, Alex and Riedmiller, Martin and Fidjeland, Andreas K. and Ostrovski, Georg and Petersen, Stig and Beattie, Charles and Sadik, Amir and Antonoglou, Ioannis and King, Helen and Kumaran, Dharshan and Wierstra, Daan and Legg, Shane and Hassabis, Demis}, interhash = {eac59980357d99db87b341b61ef6645f}, intrahash = {fb15f4471c81dc2b9edf2304cb2f7083}, issn = {00280836}, journal = {Nature}, month = feb, number = 7540, pages = {529--533}, publisher = {Nature Publishing Group, a division of Macmillan Publishers Limited. All Rights Reserved.}, title = {Human-level control through deep reinforcement learning}, url = {http://dx.doi.org/10.1038/nature14236}, volume = 518, year = 2015 } @inproceedings{tran2015semantic, abstract = {In this paper we study the problem of semantic annotation for a trending hashtag which is the crucial step towards analyzing user behavior in social media, yet has been largely unexplored. We tackle the problem via linking to entities from Wikipedia. We incorporate the social aspects of trending hashtags by identifying prominent entities for the annotation so as to maximize the information spreading in entity networks. We exploit temporal dynamics of entities in Wikipedia, namely Wikipedia edits and page views to improve the annotation quality. Our experiments show that we significantly outperform the established methods in tweet annotation.}, author = {Tran, Tuan and Tran, Nam-Khanh and Teka Hadgu, Asmelash and Jäschke, Robert}, booktitle = {Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing (EMNLP)}, interhash = {4156275c801376fa64dfdb69a4ce60c4}, intrahash = {9d4cd9070922e1eb43bcab1da4a9d840}, month = sep, publisher = {Association for Computational Linguistics}, title = {Semantic Annotation for Microblog Topics Using Wikipedia Temporal Information}, year = 2015 } @article{kluegl2013exploiting, abstract = {Conditional Random Fields (CRF) are popular methods for labeling unstructured or textual data. Like many machine learning approaches, these undirected graphical models assume the instances to be independently distributed. However, in real-world applications data is grouped in a natural way, e.g., by its creation context. The instances in each group often share additional structural consistencies. This paper proposes a domain-independent method for exploiting these consistencies by combining two CRFs in a stacked learning framework. We apply rule learning collectively on the predictions of an initial CRF for one context to acquire descriptions of its specific properties. Then, we utilize these descriptions as dynamic and high quality features in an additional (stacked) CRF. The presented approach is evaluated with a real-world dataset for the segmentation of references and achieves a significant reduction of the labeling error.}, author = {Kluegl, Peter and Toepfer, Martin and Lemmerich, Florian and Hotho, Andreas and Puppe, Frank}, interhash = {9ef3f543e4cc9e2b0ef078595f92013b}, intrahash = {fbaab25e96dd20d96ece9d7fefdc3b4f}, journal = {Mathematical Methodologies in Pattern Recognition and Machine Learning Springer Proceedings in Mathematics & Statistics}, pages = {111-125}, title = {Exploiting Structural Consistencies with Stacked Conditional Random Fields}, volume = 30, year = 2013 } @misc{becker2014subjective, abstract = {Sensor data is objective. But when measuring our environment, measured values are contrasted with our perception, which is always subjective. This makes interpreting sensor measurements difficult for a single person in her personal environment. In this context, the EveryAware projects directly connects the concepts of objective sensor data with subjective impressions and perceptions by providing a collective sensing platform with several client applications allowing to explicitly associate those two data types. The goal is to provide the user with personalized feedback, a characterization of the global as well as her personal environment, and enable her to position her perceptions in this global context. In this poster we summarize the collected data of two EveryAware applications, namely WideNoise for noise measurements and AirProbe for participatory air quality sensing. Basic insights are presented including user activity, learning processes and sensor data to perception correlations. These results provide an outlook on how this data can further be used to understand the connection between sensor data and perceptions. }, author = {Becker, Martin and Hotho, Andreas and Mueller, Juergen and Kibanov, Mark and Atzmueller, Martin and Stumme, Gerd}, howpublished = {CSSWS 2014, Poster}, interhash = {615afda9869c5e0facc8bdb5534760aa}, intrahash = {33cf40cc46170f51767c46d2ec14a495}, title = {Subjective vs. Objective Data: Bridging the Gap}, url = {http://www.gesis.org/en/events/css-wintersymposium/poster-presentation/}, year = 2014 } @inproceedings{vkistowski2015modeling, abstract = {Today’s system developers and operators face the challenge of creating software systems that make efficient use of dynamically allocated resources under highly variable and dynamic load profiles, while at the same time delivering reliable performance. Benchmarking of systems under these constraints is difficult, as state-of-the-art benchmarking frameworks provide only limited support for emulating such dynamic and highly vari- able load profiles for the creation of realistic workload scenarios. Industrial benchmarks typically confine themselves to workloads with constant or stepwise increasing loads. Alternatively, they support replaying of recorded load traces. Statistical load inten- sity descriptions also do not sufficiently capture concrete pattern load profile variations over time. To address these issues, we present the Descartes Load Intensity Model (DLIM). DLIM provides a modeling formalism for describing load intensity variations over time. A DLIM instance can be used as a compact representation of a recorded load intensity trace, providing a powerful tool for benchmarking and performance analysis. As manually obtaining DLIM instances can be time consuming, we present three different automated extraction methods, which also help to enable autonomous system analysis for self-adaptive systems. Model expressiveness is validated using the presented extraction methods. Extracted DLIM instances exhibit a median modeling error of 12.4% on average over nine different real-world traces covering between two weeks and seven months. Additionally, extraction methods perform orders of magnitude faster than existing time series decomposition approaches.}, author = {v. Kistowski, Jóakim and Nikolas, Herbst. and Zoller, Daniel and Kounev, Samuel and Hotho, Andreas}, booktitle = {Proceedings of the 10th International Symposium on Software Engineering for Adaptive and Self-Managing Systems (SEAMS)}, interhash = {9f0be929d7bcc057c778f6b44e73cf4c}, intrahash = {f449d3cf35941636f96d72aaf620a275}, title = {Modeling and Extracting Load Intensity Profiles}, year = 2015 } @inproceedings{zoller2015publication, abstract = {Scholarly success is traditionally measured in terms of citations to publications. With the advent of publication man- agement and digital libraries on the web, scholarly usage data has become a target of investigation and new impact metrics computed on such usage data have been proposed – so called altmetrics. In scholarly social bookmarking sys- tems, scientists collect and manage publication meta data and thus reveal their interest in these publications. In this work, we investigate connections between usage metrics and citations, and find posts, exports, and page views of publications to be correlated to citations.}, author = {Zoller, Daniel and Doerfel, Stephan and Jäschke, Robert and Stumme, Gerd and Hotho, Andreas}, booktitle = {Proceedings of the 2015 ACM Conference on Web Science}, interhash = {3515b34cd19959cee5fafbf4467a75ed}, intrahash = {548a7010ee2726f28e04e5c6e5fd6e2d}, title = {On Publication Usage in a Social Bookmarking System}, year = 2015 } @incollection{singer2014folksonomies, author = {Singer, Philipp and Niebler, Thomas and Hotho, Andreas and Strohmaier, Markus}, booktitle = {Encyclopedia of Social Network Analysis and Mining}, interhash = {3a55606e91328ca0191127b1fafe189e}, intrahash = {84d9498b73de976d8d550c6761d4be0d}, pages = {542--547}, publisher = {Springer}, title = {Folksonomies}, year = 2014 } @inproceedings{ring2015condist, author = {Ring, Markus and Otto, Florian and Becker, Martin and Niebler, Thomas and Landes, Dieter and Hotho, Andreas}, editor = {ECMLPKDD2015}, interhash = {c062a57a17a0910d6c27ecd664502ac1}, intrahash = {a2f9d649f2856677e4d886a3b517404d}, title = {ConDist: A Context-Driven Categorical Distance Measure}, year = 2015 } @inproceedings{dallmann2015media, address = {Cyprus, Turkey, September 1-4}, author = {Dallmann, Alexander and Lemmerich, Florian and Zoller, Daniel and Hotho, Andreas}, booktitle = {26th ACM Conference on Hypertext and Social Media}, interhash = {6b2daa7830c5e504543dcdaefed46285}, intrahash = {addfd0d84b4347392dc94a4bec400412}, publisher = {ACM}, title = {Media Bias in German Online Newspapers}, year = 2015 } @article{journals/expert/RehakPGSBC09, author = {Rehák, Martin and Pechoucek, Michal and Grill, Martin and Stiborek, Jan and Bartos, Karel and Celeda, Pavel}, ee = {http://doi.ieeecomputersociety.org/10.1109/MIS.2009.42}, interhash = {878f9ec500bf1b485f337afe0abe1801}, intrahash = {502b8b47f7e3ee930f2d79bde0b29d76}, journal = {IEEE Intelligent Systems}, number = 3, pages = {16-25}, title = {Adaptive Multiagent System for Network Traffic Monitoring.}, url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.149.3921&rep=rep1&type=pdf}, volume = 24, year = 2009 } @article{journals/nle/ZeschG10, author = {Zesch, Torsten and Gurevych, Iryna}, ee = {http://dx.doi.org/10.1017/S1351324909990167}, interhash = {3300b5457187d0f6c551e63ecb27336c}, intrahash = {76a512bbba5ba8ec5819d469d4611d81}, journal = {Natural Language Engineering}, number = 1, pages = {25-59}, title = {Wisdom of crowds versus wisdom of linguists - measuring the semantic relatedness of words.}, url = {http://dblp.uni-trier.de/db/journals/nle/nle16.html#ZeschG10}, volume = 16, year = 2010 } @inproceedings{DBLP:conf/dsaa/KrompassNT14, author = {Krompass, Denis and Nickel, Maximilian and Tresp, Volker}, bibsource = {dblp computer science bibliography, http://dblp.org}, booktitle = {International Conference on Data Science and Advanced Analytics, {DSAA} 2014, Shanghai, China, October 30 - November 1, 2014}, crossref = {DBLP:conf/dsaa/2014}, doi = {10.1109/DSAA.2014.7058046}, interhash = {0ca986606c22ca0b3780c9b9c25f31c7}, intrahash = {c952ed96ece470e4fa5336eedf670d5b}, isbn = {978-1-4799-6991-3}, pages = {18--24}, publisher = {{IEEE}}, title = {Large-scale factorization of type-constrained multi-relational data}, url = {http://dx.doi.org/10.1109/DSAA.2014.7058046}, year = 2014 } @inproceedings{singer2015hyptrails, address = {Firenze, Italy}, author = {Singer, P. and Helic, D. and Hotho, A. and Strohmaier, M.}, booktitle = {24th International World Wide Web Conference (WWW2015)}, interhash = {d33e150aa37dcd618388960286f8a46a}, intrahash = {5d21e53dc91b35a4a6cb6b9ec858045d}, month = {May 18 - May 22}, organization = {ACM}, publisher = {ACM}, title = {Hyptrails: A bayesian approach for comparing hypotheses about human trails}, url = {http://www.www2015.it/documents/proceedings/proceedings/p1003.pdf}, year = 2015 } @inproceedings{conf/icdm/DuBJ10, author = {Du, Lan and Buntine, Wray Lindsay and Jin, Huidong}, booktitle = {ICDM}, crossref = {conf/icdm/2010}, editor = {Webb, Geoffrey I. and 0001, Bing Liu and Zhang, Chengqi and Gunopulos, Dimitrios and Wu, Xindong}, ee = {http://doi.ieeecomputersociety.org/10.1109/ICDM.2010.51}, interhash = {dcde7dbdd419330aabb01d151e23c45c}, intrahash = {5a639efaf1e8fea6b0f309333efd7bee}, isbn = {978-0-7695-4256-0}, pages = {148-157}, publisher = {IEEE Computer Society}, title = {Sequential Latent Dirichlet Allocation: Discover Underlying Topic Structures within a Document.}, url = {http://dblp.uni-trier.de/db/conf/icdm/icdm2010.html#DuBJ10}, year = 2010 } @inproceedings{conf/conll/LevyG14, author = {Levy, Omer and Goldberg, Yoav}, booktitle = {CoNLL}, crossref = {conf/conll/2014}, editor = {Morante, Roser and tau Yih, Wen}, ee = {http://aclweb.org/anthology/W/W14/W14-1618.pdf}, interhash = {680dde1fd83a8dd0d6b2619a8266516e}, intrahash = {23bb00b6abab97ed93e74f3b5b148630}, isbn = {978-1-941643-02-0}, pages = {171-180}, publisher = {ACL}, title = {Linguistic Regularities in Sparse and Explicit Word Representations.}, url = {http://dblp.uni-trier.de/db/conf/conll/conll2014.html#LevyG14}, year = 2014 } @article{grimmer2013text, author = {Grimmer, Justin and Stewart, Brandon M}, interhash = {eb68e01ef4168a398d79f408042fe529}, intrahash = {76001ebc726700bef81886d2e285b7cf}, journal = {Political Analysis}, pages = {mps028}, publisher = {SPM-PMSAPSA}, title = {Text as data: The promise and pitfalls of automatic content analysis methods for political texts}, year = 2013 } @inproceedings{noauthororeditor, author = {Mirowski, Piotr and Ranzato, Marc'Aurelio and LeCun, Yann}, editor = {of the NIPS 2010 Workshop on Deep Learning, Proceedings}, interhash = {b7ce347e904a4ca3263cf6cc1e2253bd}, intrahash = {fc3e0e3af595f9a46df6bc9233df836f}, title = {Dynamic Auto-Encoders for Semantic Indexing}, url = {http://yann.lecun.com/exdb/publis/pdf/mirowski-nipsdl-10.pdf}, year = 2010 } @article{SSQU:SSQU478, abstract = {Objective. This study is an effort to produce a more systematic, empirically-based, historical-comparative understanding of media bias than generally is found in previous works.Methods. The research employs a quantitative measure of ideological bias in a formal content analysis of the United States' two largest circulation news magazines, Time and Newsweek. Findings are compared with the results of an identical examination of two of the nation's leading partisan journals, the conservative National Review and the liberal Progressive.Results. Bias scores reveal stark differences between the mainstream and the partisan news magazines' coverage of four issue areas: crime, the environment, gender, and poverty.Conclusion. Data provide little support for those claiming significant media bias in either ideological direction.}, author = {Covert, Tawnya J. Adkins and Wasburn, Philo C.}, doi = {10.1111/j.1540-6237.2007.00478.x}, interhash = {9276222b3b8684048db1e42c3a9f3409}, intrahash = {81474f00e1605d45462e23f743dc88bb}, issn = {1540-6237}, journal = {Social Science Quarterly}, number = 3, pages = {690--706}, publisher = {Blackwell Publishing Inc}, title = {Measuring Media Bias: A Content Analysis of Time and Newsweek Coverage of Domestic Social Issues, 1975–2000*}, url = {http://dx.doi.org/10.1111/j.1540-6237.2007.00478.x}, volume = 88, year = 2007 } @inproceedings{HSWHFT2014, address = {Budapest, Hungary}, author = {Hasenfratz, David and Saukh, Olga and Walser, Christoph and Hueglin, Christoph and Fierz, Martin and Thiele, Lothar}, booktitle = {Proceedings of the 12th International Conference on Pervasive Computing and Communications (PerCom 2014)}, interhash = {9a418e24cae67e23b62ca4b93f95d018}, intrahash = {72cae5af9853823b4d63e0a137aa0fe0}, month = mar, pages = {69--77}, title = {Pushing the spatio-temporal resolution limit of urban air pollution maps}, year = 2014 } @article{1621452, abstract = {This paper considers how we feel about the content we see or hear. As opposed to the cognitive content information composed of the facts about the genre, temporal content structures and spatiotemporal content elements, we are interested in obtaining the information about the feelings, emotions, and moods evoked by a speech, audio, or video clip. We refer to the latter as the affective content, and to the terms such as happy or exciting as the affective labels of an audiovisual signal. In the first part of the paper, we explore the possibilities for representing and modeling the affective content of an audiovisual signal to effectively bridge the affective gap. Without loosing generality, we refer to this signal simply as video, which we see as an image sequence with an accompanying soundtrack. Then, we show the high potential of the affective video content analysis for enhancing the content recommendation functionalities of the future PVRs and VOD systems. We conclude this paper by outlining some interesting research challenges in the field}, author = {Hanjalic, A.}, doi = {10.1109/MSP.2006.1621452}, interhash = {86afbc088a73b1bdcb2d509f2f41c711}, intrahash = {ebd87b66c699f7ae166e1224030c6200}, issn = {1053-5888}, journal = {Signal Processing Magazine, IEEE}, month = {March}, number = 2, pages = {90-100}, title = {Extracting moods from pictures and sounds: towards truly personalized TV}, url = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=1621452}, volume = 23, year = 2006 } @article{Kulkarni:2009:Biomed-Eng-Online:19656402, abstract = {Facial expressions are important in facilitating human communication and interactions. Also, they are used as an important tool in behavioural studies and in medical rehabilitation. Facial image based mood detection techniques may provide a fast and practical approach for non-invasive mood detection. The purpose of the present study was to develop an intelligent system for facial image based expression classification using committee neural networks.Several facial parameters were extracted from a facial image and were used to train several generalized and specialized neural networks. Based on initial testing, the best performing generalized and specialized neural networks were recruited into decision making committees which formed an integrated committee neural network system. The integrated committee neural network system was then evaluated using data obtained from subjects not used in training or in initial testing.The system correctly identified the correct facial expression in 255 of the 282 images (90.43% of the cases), from 62 subjects not used in training or in initial testing. Committee neural networks offer a potential tool for image based mood detection.}, author = {Kulkarni, S S and Reddy, N P and Hariharan, S I}, doi = {10.1186/1475-925X-8-16}, interhash = {9bcd872ea86213a2f7d3271b0e6eb7d1}, intrahash = {14c48c03f40a1c8bdc22314fcdf292bf}, journal = {Biomed Eng Online}, pages = {16-16}, pmid = {19656402}, title = {Facial expression (mood) recognition from facial images using committee neural networks}, url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2731770/}, volume = 8, year = 2009 } @article{noKey, abstract = {The extensive literature documenting the ecological effects of roads has repeatedly implicated noise as one of the causal factors. Recent studies of wildlife responses to noise have decisively identified changes in animal behaviors and spatial distributions that are caused by noise. Collectively, this research suggests that spatial extent and intensity of potential noise impacts to wildlife can be studied by mapping noise sources and modeling the propagation of noise across landscapes. Here we present models of energy extraction, aircraft overflight and roadway noise as examples of spatially extensive sources and to present tools available for landscape scale investigations. We focus these efforts in US National Parks (Mesa Verde, Grand Teton and Glacier) to highlight that ecological noise pollution is not a threat restricted to developed areas and that many protected natural areas experience significant noise loads. As a heuristic tool for understanding past and future noise pollution we forecast community noise utilizing a spatially-explicit land-use change model that depicts the intensity of human development at sub-county resolution. For road noise, we transform effect distances from two studies into sound levels to begin a discussion of noise thresholds for wildlife. The spatial scale of noise exposure is far larger than any protected area, and no site in the continental US is free form noise. The design of observational and experimental studies of noise effects should be informed by knowledge of regional noise exposure patterns.}, author = {Barber, Jesse R. and Burdett, Chris L. and Reed, Sarah E. and Warner, Katy A. and Formichella, Charlotte and Crooks, Kevin R. and Theobald, Dave M. and Fristrup, Kurt M.}, doi = {10.1007/s10980-011-9646-7}, interhash = {ebd2433210dffb7fecae1dcf14b4fa6b}, intrahash = {17c859ff5dba77ef46cb7677f5221519}, issn = {0921-2973}, journal = {Landscape Ecology}, language = {English}, number = 9, pages = {1281-1295}, publisher = {Springer Netherlands}, title = {Anthropogenic noise exposure in protected natural areas: estimating the scale of ecological consequences}, url = {http://dx.doi.org/10.1007/s10980-011-9646-7}, volume = 26, year = 2011 } @misc{karampatziakis2013discriminative, abstract = {Representing examples in a way that is compatible with the underlying classifier can greatly enhance the performance of a learning system. In this paper we investigate scalable techniques for inducing discriminative features by taking advantage of simple second order structure in the data. We focus on multiclass classification and show that features extracted from the generalized eigenvectors of the class conditional second moments lead to classifiers with excellent empirical performance. Moreover, these features have attractive theoretical properties, such as inducing representations that are invariant to linear transformations of the input. We evaluate classifiers built from these features on three different tasks, obtaining state of the art results.}, author = {Karampatziakis, Nikos and Mineiro, Paul}, interhash = {befee5ff60893632b4a38edb54e7c975}, intrahash = {47512dd90370c769bfd328d8fd8179ef}, note = {cite arxiv:1310.1934}, title = {Discriminative Features via Generalized Eigenvectors}, url = {http://arxiv.org/abs/1310.1934}, year = 2013 } @misc{yu2013largescale, abstract = {The multi-label classification problem has generated significant interest in recent years. However, existing approaches do not adequately address two key challenges: (a) the ability to tackle problems with a large number (say millions) of labels, and (b) the ability to handle data with missing labels. In this paper, we directly address both these problems by studying the multi-label problem in a generic empirical risk minimization (ERM) framework. Our framework, despite being simple, is surprisingly able to encompass several recent label-compression based methods which can be derived as special cases of our method. To optimize the ERM problem, we develop techniques that exploit the structure of specific loss functions - such as the squared loss function - to offer efficient algorithms. We further show that our learning framework admits formal excess risk bounds even in the presence of missing labels. Our risk bounds are tight and demonstrate better generalization performance for low-rank promoting trace-norm regularization when compared to (rank insensitive) Frobenius norm regularization. Finally, we present extensive empirical results on a variety of benchmark datasets and show that our methods perform significantly better than existing label compression based methods and can scale up to very large datasets such as the Wikipedia dataset.}, author = {Yu, Hsiang-Fu and Jain, Prateek and Kar, Purushottam and Dhillon, Inderjit S.}, interhash = {1252173520757338468a68e028494647}, intrahash = {716e5270c1dcb3a1e4eedf9934859021}, note = {cite arxiv:1307.5101}, title = {Large-scale Multi-label Learning with Missing Labels}, url = {http://arxiv.org/abs/1307.5101}, year = 2013 } @book{schnyder1962zrcher, address = {Zürich}, author = {Schnyder, Werner}, editor = {{Staatsarchiv des Kantons Zürich}}, interhash = {d976f0bfcc2e2fc29e4906401a99e19e}, intrahash = {1ac82216499fef17c191008ace685af4}, title = {Die Zürcher Ratslisten 1225 bis 1798}, url = {http://opac.regesta-imperii.de/lang_de/kurztitelsuche_r.php?kurztitel=Schnyder%2C+Z%C3%BCrcher+Ratslisten}, year = 1962 } @inproceedings{melville2002contentboosted, abstract = {Most recommender systems use Collaborative Filtering or Content-based methods to predict new items of interest for a user. While both methods have their own advantages, individually they fail to provide good recommendations in many situations. Incorporating components from both methods, a hybrid recommender system can overcome these shortcomings. In this paper, we present an elegant and effective framework for combining content and collaboration. Our approach uses a content-based predictor tc enhance existing user data, and then provides personalized suggestions through collaborative filtering. We present experimental results that show how this approach, Content-Boosted Collaborative Filtering, performs better than a pure content-based predictor, pure collaborative filter, and a naive hybrid approach.}, acmid = {777124}, address = {Menlo Park, CA, USA}, author = {Melville, Prem and Mooney, Raymod J. and Nagarajan, Ramadass}, booktitle = {Eighteenth National Conference on Artificial Intelligence}, interhash = {985028099c1a29f116ad7434005895ac}, intrahash = {a4917f0299f48e403966a8003ebd50be}, isbn = {0-262-51129-0}, location = {Edmonton, Alberta, Canada}, numpages = {6}, pages = {187--192}, publisher = {American Association for Artificial Intelligence}, title = {Content-boosted Collaborative Filtering for Improved Recommendations}, url = {http://dl.acm.org/citation.cfm?id=777092.777124}, year = 2002 } @article{Anicich01112014, abstract = {Interpreting scholarly contributions solely on the basis of the number, and not nature, of citations is inherently flawed because contradictory as well as confirmatory findings feed into the same metric, capturing popularity at the expense of precision. I propose a citation and indexing procedure that would conveniently integrate information about research trends while imposing minimal burden on the producers and consumers of research. Under the proposed system, citations appearing in the reference list of research reports would be superscripted with letters corresponding to one of the following six categories: references to findings that are Consistent with the current findings, are Replicated by the current findings, are Inconsistent with the current findings, Failed to be replicated by the current findings, were used to build Theory, or were used to cite Methodologies. I explain how the resulting CRIF-TM data could be summarized and perpetually updated by an online indexing service. I provide an example to demonstrate how these superscripts could be conveniently and unobtrusively presented in the reference list of forthcoming articles. Finally, I examine the anticipated benefits, limitations, and implementation challenges of the proposed citation and indexing procedure.}, author = {Anicich, Eric M.}, doi = {10.1177/1745691614549772}, eprint = {http://pps.sagepub.com/content/9/6/682.full.pdf+html}, interhash = {af5e16af5f2861d1e53f02d8e58cf221}, intrahash = {ead9a503ae90b7f74d16739d7e813454}, journal = {Perspectives on Psychological Science}, number = 6, pages = {682-691}, title = {What Lies Within: Superscripting References to Reveal Research Trends}, url = {http://pps.sagepub.com/content/9/6/682.abstract}, volume = 9, year = 2014 } @misc{he2014network, abstract = {We propose a new method for network reconstruction by the stationary distribution data of Markov chains on this network. Our method has the merits that: the data we need are much few than most method and need not defer to the time order, and we do not need the input data. We define some criterions to measure the efficacy and the simulation results on several networks, including computer-generated networks and real networks, indicate our method works well. The method consist of two procedures, fist, reconstruct degree sequence, second, reconstruct the network(or edges). And we test the efficacy of each procedure.}, author = {He, Zhe and Xu, Rui-Jie and Wang, Bing-Hong}, interhash = {af79d943d03de3193b6b9fd5935c5719}, intrahash = {0d627343f01e79c3427f5a412757e482}, note = {cite arxiv:1410.4120Comment: 4 pages, 3 figures}, title = {Network reconstruction by stationary distribution data of Markov chains based on correlation analysis}, url = {http://arxiv.org/abs/1410.4120}, year = 2014 } @misc{blondel2015survey, abstract = {In this paper, we review some advances made recently in the study of mobile phone datasets. This area of research has emerged a decade ago, with the increasing availability of large-scale anonymized datasets, and has grown into a stand-alone topic. We will survey the contributions made so far on the social networks that can be constructed with such data, the study of personal mobility, geographical partitioning, urban planning, and help towards development as well as security and privacy issues.}, author = {Blondel, Vincent D. and Decuyper, Adeline and Krings, Gautier}, interhash = {4386dfbc20b3f9e6a1a5bf113f5cdd1c}, intrahash = {469e50f40c6091f639cff024f8e90100}, note = {cite arxiv:1502.03406}, title = {A survey of results on mobile phone datasets analysis}, url = {http://arxiv.org/abs/1502.03406}, year = 2015 } @inproceedings{Kumar:2015:IS:2684822.2685310, abstract = {We consider the problem of inferring choices made by users based only on aggregate data containing the relative popularity of each item. We propose a framework that models the problem as that of inferring a Markov chain given a stationary distribution. Formally, we are given a graph and a target steady-state distribution on its nodes. We are also give a mapping from per-node scores to a transition matrix, from a broad family of such mappings. The goal is to set the scores of each node such that the resulting transition matrix induces the desired steady state. We prove sufficient conditions under which this problem is feasible and, for the feasible instances, obtain a simple algorithm for a generic version of the problem. This iterative algorithm provably finds the unique solution to this problem and has a polynomial rate of convergence; in practice we find that the algorithm converges after fewer than ten iterations. We then apply this framework to choice problems in online settings and show that our algorithm is able to explain the observed data and predict the user choices much better than other competing baselines across a variety of diverse datasets.}, acmid = {2685310}, address = {New York, NY, USA}, author = {Kumar, Ravi and Tomkins, Andrew and Vassilvitskii, Sergei and Vee, Erik}, booktitle = {Proceedings of the Eighth ACM International Conference on Web Search and Data Mining}, doi = {10.1145/2684822.2685310}, interhash = {15326871c92155e46259db7cb455d584}, intrahash = {e0e10a01d0f65da00f5390482407abd2}, isbn = {978-1-4503-3317-7}, location = {Shanghai, China}, numpages = {10}, pages = {359--368}, publisher = {ACM}, series = {WSDM '15}, title = {Inverting a Steady-State}, url = {http://doi.acm.org/10.1145/2684822.2685310}, year = 2015 } @incollection{pol_introduction, author = {Lehmann, Jens and Voelker, Johanna}, booktitle = {Perspectives on Ontology Learning}, editor = {Lehmann, Jens and Voelker, Johanna}, interhash = {a53a9f1796f71f2f1c5ec646961f8924}, intrahash = {cf6a6785f5cab0525632a003c47ef5f7}, owner = {jl}, pages = {ix-xvi}, publisher = {AKA / IOS Press}, title = {An Introduction to Ontology Learning}, url = {http://jens-lehmann.org/files/2014/pol_introduction.pdf}, year = 2014 } @inproceedings{mitchell2015, author = {Mitchell, T. and Cohen, W. and Hruscha, E. and Talukdar, P. and Betteridge, J. and Carlson, A. and Dalvi, B. and Gardner, M. and Kisiel, B. and Krishnamurthy, J. and Lao, N. and Mazaitis, K. and Mohammad, T. and Nakashole, N. and Platanios, E. and Ritter, A. and Samadi, M. and Settles, B. and Wang, R. and Wijaya, D. and Gupta, A. and Chen, X. and Saparov, A. and Greaves, M. and Welling, J.}, booktitle = {AAAI}, interhash = {52d0d71f6f5b332dabc1412f18e3a93d}, intrahash = {63070703e6bb812852cca56574aed093}, note = {: Never-Ending Learning in AAAI-2015}, title = {Never-Ending Learning}, url = {http://www.cs.cmu.edu/~wcohen/pubs.html}, year = 2015 } @article{noKey, abstract = {Applications of the Social Web are ubiquitous and have become an integral part of everyday life: Users make friends, for example, with the help of online social networks, share thoughts via Twitter, or collaboratively write articles in Wikipedia. All such interactions leave digital traces; thus, users participate in the creation of heterogeneous, distributed, collaborative data collections. In linguistics, the }, author = {Mitzlaff, Folke and Atzmueller, Martin and Hotho, Andreas and Stumme, Gerd}, doi = {10.1007/s13278-014-0216-2}, eid = {216}, interhash = {7e02f08a123c801c33ac93109394adfb}, intrahash = {5b268a7c5308af783c3028573ffcd0c0}, issn = {1869-5450}, journal = {Social Network Analysis and Mining}, language = {English}, number = 1, publisher = {Springer Vienna}, title = {The social distributional hypothesis: a pragmatic proxy for homophily in online social networks}, url = {http://dx.doi.org/10.1007/s13278-014-0216-2}, volume = 4, year = 2014 } @misc{singer2014hyptrails, abstract = {When users interact with the Web today, they leave sequential digital trails on a massive scale. Examples of such human trails include Web navigation, sequences of online restaurant reviews, or online music play lists. Understanding the factors that drive the production of these trails can be useful for e.g., improving underlying network structures, predicting user clicks or enhancing recommendations. In this work, we present a general approach called HypTrails for comparing a set of hypotheses about human trails on the Web, where hypotheses represent beliefs about transitions between states. Our approach utilizes Markov chain models with Bayesian inference. The main idea is to incorporate hypotheses as informative Dirichlet priors and to leverage the sensitivity of Bayes factors on the prior for comparing hypotheses with each other. For eliciting Dirichlet priors from hypotheses, we present an adaption of the so-called (trial) roulette method. We demonstrate the general mechanics and applicability of HypTrails by performing experiments with (i) synthetic trails for which we control the mechanisms that have produced them and (ii) empirical trails stemming from different domains including website navigation, business reviews and online music played. Our work expands the repertoire of methods available for studying human trails on the Web.}, author = {Singer, Philipp and Helic, Denis and Hotho, Andreas and Strohmaier, Markus}, interhash = {54535487cdfa9024073c07e336e03d70}, intrahash = {07a19041ef1bfd5cef707e03d1510d5e}, note = {cite arxiv:1411.2844}, title = {HypTrails: A Bayesian Approach for Comparing Hypotheses about Human Trails on the Web}, url = {http://arxiv.org/abs/1411.2844}, year = 2014 } @article{christin2011survey, author = {Christin, Delphine and Reinhardt, Andreas and Kanhere, Salil S and Hollick, Matthias}, interhash = {34a9bd6609a37048345486fce4624b7a}, intrahash = {8e81d393500ab9d5b575e397c51a4868}, journal = {Journal of Systems and Software}, number = 11, pages = {1928--1946}, publisher = {Elsevier}, title = {A survey on privacy in mobile participatory sensing applications}, url = {http://scholar.google.de/scholar.bib?q=info:qpMZngbCBHYJ:scholar.google.com/&output=citation&scisig=AAGBfm0AAAAAVJLgOK6PYLcv_X2uOph4-evGd2AOVDax&scisf=4&hl=en&scfhb=1}, volume = 84, year = 2011 } @incollection{smith2001disambiguating, abstract = {Geographic interfaces provide natural, scalable visualizations for many digital library collections, but the wide range of data in digital libraries presents some particular problems for identifying and disambiguating place names. We describe the toponym-disambiguation system in the Perseus digital library and evaluate its performance. Name categorization varies significantly among different types of documents, but toponym disambiguation performs at a high level of precision and recall with a gazetteer an order of magnitude larger than most other applications.}, author = {Smith, DavidA. and Crane, Gregory}, booktitle = {Research and Advanced Technology for Digital Libraries}, doi = {10.1007/3-540-44796-2_12}, editor = {Constantopoulos, Panos and Sølvberg, IngeborgT.}, interhash = {82e7065b21f103b222a12703f8c9cf1e}, intrahash = {97924b0f6a7f9bdc08f4419ce8144d53}, isbn = {978-3-540-42537-3}, language = {English}, pages = {127-136}, publisher = {Springer Berlin Heidelberg}, series = {Lecture Notes in Computer Science}, title = {Disambiguating Geographic Names in a Historical Digital Library}, url = {http://dx.doi.org/10.1007/3-540-44796-2_12}, volume = 2163, year = 2001 } @inproceedings{Teufel01task-basedevaluation, abstract = {We present a novel method for task-based evalua- tion of summaries of scientific articles. The task we propose is a question-answering task, where the questions are about the relatedness of the current paper to prior research. This evaluation method is time-efficient with respect to material preparation and data collection, so that it is possible to test against many different baselines, something that is not usually feasible in evaluations by relevance decision. We use this methodology to evaluate the quality of summaries our system produces. These summaries are designed to describe the contribution of a scientific article in relation to other work. The re- sults show that this type of summary is indeed more useful than the baselines (random sentences, keyword lists and generic author-written summaries), and nearly as useful as the full texts.}, author = {Teufel, Simone}, booktitle = {In Workshop Automatic Summarization, NAACL}, interhash = {ed0c6de01aa6b0a3ef369627eb689cf3}, intrahash = {b95470c8eae5d8f0372d20215c35f236}, pages = {12--21}, title = {Task-Based Evaluation of Summary Quality: Describing Relationships between Scientific Papers}, url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.12.8139}, year = 2001 } @proceedings{thierrypoibeau2013multisource, abstract = {Information extraction (IE) and text summarization (TS) are powerful technologies for finding relevant pieces of information in text and presenting them to the user in condensed form. The ongoing information explosion makes IE and TS critical for successful functioning within the information society. These technologies face particular challenges due to the inherent multi-source nature of the information explosion. The technologies must now handle not isolated texts or individual narratives, but rather large-scale repositories and streams--in general, in multiple languages--containing a multiplicity of perspectives, opinions, or commentaries on particular topics, entities or events. There is thus a need to adapt existing techniques and develop new ones to deal with these challenges. This volume contains a selection of papers that present a variety of methodologies for content identification and extraction, as well as for content fusion and regeneration. The chapters cover various aspects of the challenges, depending on the nature of the information sought--names vs. events,-- and the nature of the sources--news streams vs. image captions vs. scientific research papers, etc. This volume aims to offer a broad and representative sample of studies from this very active research field.}, address = {Berlin; New York}, editor = {Poibeau, Thierry and Saggion, Horacio and Piskorski, Jakub and Yangarber, Roman}, interhash = {b1d51398d5660ed1e16f40d74cc815db}, intrahash = {21816f2809a2b58397acce5ac9558d28}, isbn = {9783642285691 3642285694 3642285686 9783642285684}, publisher = {Springer}, refid = {808368416}, title = {Multi-source, multilingual information extraction and summarization}, url = {http://link.springer.com/book/10.1007/978-3-642-28569-1}, year = 2013 } @article{PhysRevE.64.016131, author = {Newman, M. E. J.}, doi = {10.1103/PhysRevE.64.016131}, interhash = {c2e3ef110ba67dd66249c354725aa680}, intrahash = {c4ec4bf95bf426882af0061bee863511}, journal = {Phys. Rev. E}, month = jun, number = 1, numpages = {8}, pages = 016131, publisher = {American Physical Society}, title = {Scientific collaboration networks. I. Network construction and fundamental results}, url = {http://link.aps.org/doi/10.1103/PhysRevE.64.016131}, volume = 64, year = 2001 } @article{Teufel02summarizingscientific, abstract = {this paper we argue that scientific articles require a different summarization strategy than, for instance, news articles. We propose a strategy which concentrates on the rhetorical status of statements in the article: Material for summaries is selected in such a way that summaries can highlight the new contribution of the source paper and situate it with respect to earlier work. We provide a gold standard for summaries of this kind consisting of a substantial corpus of conference articles in computational linguistics with human judgements of rhetorical status and relevance. We present several experiments measuring our judges' agreement on these annotations. We also present an algorithm which, on the basis of the annotated training material, selects content and classifies it into a fixed set of seven rhetorical categories. The output of this extraction and classification system can be viewed as a single-document summary in its own right; alternatively, it can be used to generate task-oriented and user-tailored summaries designed to give users an overview of a scientific field.}, author = {Teufel, Simone and Moens, Marc}, interhash = {5062ef01775fa6300141a99937d0f1cd}, intrahash = {7b5e363f72b4351d3afba8f2b369bed6}, journal = {Computational Linguistics}, pages = 2002, title = {Summarizing Scientific Articles - Experiments with Relevance and Rhetorical Status}, url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.27.5593}, volume = 28, year = 2002 } @inproceedings{Kando99textstructure, abstract = {This paper describes an information retrieval system with the function to support user's use of the retrieved documents using the text-level structure of documents. The text-level structure of each document is described by the occurrence of typical functional components in the text. Automatic detection of the components has been attempted in previous works using surface-level language processing. The proposed system firstly utilizes the text structure to conduct high-precision searches of documents or passages by distinguishing the role or function each concept plays in the text. It also allows browsing or skimming of retrieved texts, creating summaries on-the-fly with various levels of condensation specified by the user. Moreover, the system can search and display any unit of a text such as a sentence, a paragraph or a chapter. Comparison of relevant passages in retrieved documents across multiple texts is helpful for users to examine, analyze, compare and integrate texts and...}, author = {Kando, Noriko}, booktitle = {IN PROCEEDINGS OF THE 4TH INTERNATIONAL WORKSHOP ON INFORMATION RETRIEVAL WITH ASIAN LANGUAGES}, interhash = {7cd913f37b21579ee636dc5036cca292}, intrahash = {94d62d9f3ef51731ea1edc03662616d9}, pages = {126--135}, title = {Text Structure Analysis as a Tool to Make Retrieved Documents Usable}, url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.34.3165}, year = 1999 } @inproceedings{Carlson10, author = {Carlson, A. and Betteridge, J. and Kisiel, B. and Settles, B. and Jr., E.R. Hruschka and Mitchell, T.M.}, booktitle = {Proceedings of the Conference on Artificial Intelligence (AAAI)}, interhash = {5df31649862b1002848792cd495d46dc}, intrahash = {f0d94ab9d299609ee92f6ecf555266d4}, pages = {1306--1313}, publisher = {AAAI Press}, title = {Toward an Architecture for Never-Ending Language Learning}, year = 2010 } @article{lohrmarch112013computer, author = {Lohr, Steve}, editor = {Times, New York}, interhash = {3ab18521dd9e04d43a64c93174104b93}, intrahash = {8d09e20df09a7df761eb23ca223d3391}, title = {NELL Is a Computer That Reads the Web - With a Little Human Help}, year = {March 11, 2013} } @book{staab2009handbook, abstract = {An ontology is a formal description of concepts and relationships that can exist for a community of human and/or machine agents. This book considers ontology languages, ontology engineering methods, example ontologies, infrastructures and technologies for ontologies, and how to bring this all into ontology-based infrastructures and applications.}, address = {Berlin}, author = {Staab, Steffen and Studer, Rudi}, interhash = {c2e7c401bef2cee2bb8b12334d3c7a88}, intrahash = {be122d99dc6dd20cb58a55d62d8eca6c}, isbn = {9783540926733 3540926739}, publisher = {Springer}, refid = {569892085}, title = {Handbook on ontologies}, url = {http://public.eblib.com/choice/publicfullrecord.aspx?p=571805}, year = 2009 } @incollection{piskorski2013information, abstract = {In this chapter we present a brief overview of Information Extraction, which is an area of natural language processing that deals with finding factual information in free text. In formal terms, }, author = {Piskorski, Jakub and Yangarber, Roman}, booktitle = {Multi-source, Multilingual Information Extraction and Summarization}, doi = {10.1007/978-3-642-28569-1_2}, editor = {Poibeau, Thierry and Saggion, Horacio and Piskorski, Jakub and Yangarber, Roman}, interhash = {276145faeb3b45461f09f6ae5aabef5e}, intrahash = {55c1de993e15515d35b68a512088d607}, isbn = {978-3-642-28568-4}, language = {English}, pages = {23-49}, publisher = {Springer Berlin Heidelberg}, series = {Theory and Applications of Natural Language Processing}, title = {Information Extraction: Past, Present and Future}, url = {http://dx.doi.org/10.1007/978-3-642-28569-1_2}, year = 2013 } @book{manning2008, author = {Manning, Christopher D. and Raghavan, Prabhakar and Schütze, Hinrich}, interhash = {2e574e46b7668a7268e7f02b46f4d9bb}, intrahash = {9f4ab13e07b48b9723113aa74224be65}, publisher = {Cambridge University Press}, title = {Introduction to Information Retrieval}, year = 2008 } @article{strohmaier2014computational, author = {Strohmaier, Markus and Wagner, Claudia}, interhash = {af7197ba15ff513cefe2ae9410e1b050}, intrahash = {6a4109ab23c2a3789d30d44afbfb1ebf}, journal = {Intelligent Systems}, pages = {84-88}, publisher = {IEEE }, title = {Computational Social Science for the World Wide Web}, year = 2014 } @article{cimiano05learning, author = {Cimiano, Philipp and Hotho, Andreas and Staab, Steffen}, ee = {http://www.jair.org/papers/paper1648.html}, interhash = {4c09568cff62babd362aab03095f4589}, intrahash = {eaaf0e4b3a8b29fab23b6c15ce2d308d}, journal = {Journal on Artificial Intelligence Research}, pages = {305-339}, title = {Learning Concept Hierarchies from Text Corpora using Formal Concept Analysis}, url = {http://dblp.uni-trier.de/db/journals/jair/jair24.html#CimianoHS05}, volume = 24, year = 2005 } @article{ASI:ASI21001, abstract = {Authorship attribution supported by statistical or computational methods has a long history starting from the 19th century and is marked by the seminal study of Mosteller and Wallace (1964) on the authorship of the disputed “Federalist Papers.” During the last decade, this scientific field has been developed substantially, taking advantage of research advances in areas such as machine learning, information retrieval, and natural language processing. The plethora of available electronic texts (e.g., e-mail messages, online forum messages, blogs, source code, etc.) indicates a wide variety of applications of this technology, provided it is able to handle short and noisy text from multiple candidate authors. In this article, a survey of recent advances of the automated approaches to attributing authorship is presented, examining their characteristics for both text representation and text classification. The focus of this survey is on computational requirements and settings rather than on linguistic or literary issues. We also discuss evaluation methodologies and criteria for authorship attribution studies and list open questions that will attract future work in this area.}, author = {Stamatatos, Efstathios}, doi = {10.1002/asi.21001}, interhash = {9b36b85e07a64fd5295da2a2ad9a9efb}, intrahash = {3633fceae3a425c891514f341707a1dc}, issn = {1532-2890}, journal = {Journal of the American Society for Information Science and Technology}, number = 3, pages = {538--556}, publisher = {Wiley Subscription Services, Inc., A Wiley Company}, title = {A survey of modern authorship attribution methods}, url = {http://dx.doi.org/10.1002/asi.21001}, volume = 60, year = 2009 } @article{vorontsovtutorial, author = {Vorontsov, Konstantin and Potapenko, Anna}, interhash = {b3302a48be9b79342711884605ee3503}, intrahash = {12f451e98ef51ea1060565ab96e19e3c}, title = {Tutorial on Probabilistic Topic Modeling: Additive Regularization for Stochastic Matrix Factorization}, year = 2014 } @article{6542727, abstract = {In this paper, we propose a novel hierarchical generative model, named author-genre-topic model (AGTM), to perform satellite image annotation. Different from the existing author-topic model in which each author and topic are associated with the multinomial distributions over topics and words, in AGTM, each genre, author, and topic are associated with the multinomial distributions over authors, topics, and words, respectively. The bias of the distribution of the authors with respect to the topics can be rectified by incorporating the distribution of the genres with respect to the authors. Therefore, the classification accuracy of documents is improved when the information of genre is introduced. By representing the images with several visual words, the AGTM can be used for satellite image annotation. The labels of classes and scenes of the images correspond to the authors and the genres of the documents, respectively. The labels of classes and scenes of test images can be estimated, and the accuracy of satellite image annotation is improved when the information of scenes is introduced in the training images. Experimental results demonstrate the good performance of the proposed method.}, author = {Luo, Wang and Li, Hongliang and Liu, Guanghui and Zeng, Liaoyuan}, doi = {10.1109/TGRS.2013.2250978}, interhash = {4152c5c479a7eae90a4ee1f63dc89610}, intrahash = {a68906eb86024782ace5fe7a33d16522}, issn = {0196-2892}, journal = {Geoscience and Remote Sensing, IEEE Transactions on}, month = feb, number = 2, pages = {1356-1368}, title = {Semantic Annotation of Satellite Images Using Author - Genre - Topic Model}, url = {http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=6542727&abstractAccess=no&userType=inst}, volume = 52, year = 2014 } @article{kataria2011context, abstract = {In a document network such as a citation network of scientific documents, web-logs etc., the content produced by authors exhibit their interest in certain topics. In addition some authors influence other authors' interests. In this work, we propose to model the influence of cited authors along with the interests of citing authors. Morover , we hypothesize that citations present in documents, the context surrounding the citation mention provides extra topical information about the cited authors. However, associating terms in the context to the cited authors remains an open problem. We propose novel document generation schemes that incorporate the context while simultaneously modeling the interests of citing authors and influence of the cited authors. Our experiments show significant improvements over baseline models for various evaluation criteria such as link prediction between document and cited author, and quantitatively explaining unseen text.}, author = {Kataria, Saurabh and Mitra, Prasenjit and Caragea, Cornelia and Giles, C.}, conference = {International Joint Conference on Artificial Intelligence}, interhash = {7496b4df1335fbc6aea691cecb65289d}, intrahash = {dc774d17ec721be6d32530d265f34539}, title = {Context Sensitive Topic Models for Author Influence in Document Networks}, url = {https://www.aaai.org/ocs/index.php/IJCAI/IJCAI11/paper/view/3140}, year = 2011 } @proceedings{jannach2014proceedings, bibsource = {dblp computer science bibliography, http://dblp.org}, editor = {Jannach, Dietmar and Freyne, Jill and Geyer, Werner and Guy, Ido and Hotho, Andreas and Mobasher, Bamshad}, interhash = {a1a704ec9c98e6031a1444c6eccc7c0a}, intrahash = {09cb7c63e60bd3c5e6773c9c871a8aba}, publisher = {CEUR-WS.org}, series = {{CEUR} Workshop Proceedings}, title = {Proceedings of the 6th Workshop on Recommender Systems and the Social Web (RSWeb 2014) co-located with the 8th {ACM} Conference on Recommender Systems (RecSys 2014), Foster City, CA, USA, October 6, 2014}, url = {http://ceur-ws.org/Vol-1271}, volume = 1271, year = 2014 } @proceedings{cellier2014proceedings, bibsource = {dblp computer science bibliography, http://dblp.org}, editor = {Cellier, Peggy and Charnois, Thierry and Hotho, Andreas and Matwin, Stan and Moens, Marie{-}Francine and Toussaint, Yannick}, interhash = {212d282598a034c37510c1c08c4f3a34}, intrahash = {cfb7265080d484cfda32e1fbdaff361f}, publisher = {CEUR-WS.org}, series = {{CEUR} Workshop Proceedings}, title = {Proceedings of the 1st International Workshop on Interactions between Data Mining and Natural Language Processing co-located with The European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases, DMNLP@PKDD/ECML 2014, Nancy, France, September 15, 2014}, url = {http://ceur-ws.org/Vol-1202}, volume = 1202, year = 2014 } @inproceedings{jannach2014sixth, author = {Jannach, Dietmar and Freyne, Jill and Geyer, Werner and Guy, Ido and Hotho, Andreas and Mobasher, Bamshad}, bibsource = {dblp computer science bibliography, http://dblp.org}, booktitle = {Eighth {ACM} Conference on Recommender Systems, RecSys '14, Foster City, Silicon Valley, CA, {USA} - October 06 - 10, 2014}, doi = {10.1145/2645710.2645786}, interhash = {b465a3695da123d6ee9de1675cb3d480}, intrahash = {5773f799bec72240eda5e6cfb6a03d7b}, pages = 395, title = {The sixth {ACM} RecSys workshop on recommender systems and the social web}, url = {http://doi.acm.org/10.1145/2645710.2645786}, year = 2014 } @inproceedings{DBLP:conf/recsys/JannachFGGHM14, author = {Jannach, Dietmar and Freyne, Jill and Geyer, Werner and Guy, Ido and Hotho, Andreas and Mobasher, Bamshad}, bibsource = {dblp computer science bibliography, http://dblp.org}, booktitle = {Eighth {ACM} Conference on Recommender Systems, RecSys '14, Foster City, Silicon Valley, CA, {USA} - October 06 - 10, 2014}, crossref = {DBLP:conf/recsys/2014}, doi = {10.1145/2645710.2645786}, editor = {Kobsa, Alfred and Zhou, Michelle X. and Ester, Martin and Koren, Yehuda}, interhash = {b465a3695da123d6ee9de1675cb3d480}, intrahash = {22982f128f7f6d009dbf9bd8ed1f3705}, isbn = {978-1-4503-2668-1}, pages = 395, publisher = {{ACM}}, title = {The sixth {ACM} RecSys workshop on recommender systems and the social web}, url = {http://doi.acm.org/10.1145/2645710.2645786}, year = 2014 } @proceedings{DBLP:conf/recsys/2014, bibsource = {dblp computer science bibliography, http://dblp.org}, editor = {Kobsa, Alfred and Zhou, Michelle X. and Ester, Martin and Koren, Yehuda}, interhash = {b13ef6de1f0f23de61196c1b69632341}, intrahash = {3bbaac1ed2eebe59ee1c9814005d8288}, isbn = {978-1-4503-2668-1}, publisher = {{ACM}}, title = {Eighth {ACM} Conference on Recommender Systems, RecSys '14, Foster City, Silicon Valley, CA, {USA} - October 06 - 10, 2014}, url = {http://dl.acm.org/citation.cfm?id=2645710}, year = 2014 } @inproceedings{doerfel2014social, address = {New York, NY, USA}, author = {Doerfel, Stephan and Zoller, Daniel and Singer, Philipp and Niebler, Thomas and Hotho, Andreas and Strohmaier, Markus}, booktitle = {Proceedings of the 23rd International World Wide Web Conference}, interhash = {9223d6d728612c8c05a80b5edceeb78b}, intrahash = {11fab5468dd4b4e3db662ea5e68df8e0}, publisher = {ACM}, series = {WWW 2014}, title = {How Social is Social Tagging?}, year = 2014 } @inproceedings{doerfel2014evaluating, author = {Doerfel, Stephan and Zoller, Daniel and Singer, Philipp and Niebler, Thomas and Hotho, Andreas and Strohmaier, Markus}, bibsource = {dblp computer science bibliography, http://dblp.org}, booktitle = {Proceedings of the 16th {LWA} Workshops: KDML, {IR} and FGWM, Aachen, Germany, September 8-10, 2014.}, editor = {Seidl, Thomas and Hassani, Marwan and Beecks, Christian}, interhash = {955cd7c6f7652b7c531b699464925b1f}, intrahash = {4b2e73c82b5a84e1959ad66aaad4a235}, pages = {18--19}, publisher = {CEUR-WS.org}, title = {Evaluating Assumptions about Social Tagging - {A} Study of User Behavior in BibSonomy}, url = {http://ceur-ws.org/Vol-1226/paper06.pdf}, year = 2014 } @article{atzmueller2014ubicon, abstract = {The combination of ubiquitous and social computing is an emerging research area which integrates different but complementary methods, techniques and tools. In this paper, we focus on the Ubicon platform, its applications, and a large spectrum of analysis results. Ubicon provides an extensible framework for building and hosting applications targeting both ubiquitous and social environments. We summarize the architecture and exemplify its implementation using four real-world applications built on top of Ubicon. In addition, we discuss several scientific experiments in the context of these applications in order to give a better picture of the potential of the framework, and discuss analysis results using several real-world data sets collected utilizing Ubicon.}, author = {Atzmueller, Martin and Becker, Martin and Kibanov, Mark and Scholz, Christoph and Doerfel, Stephan and Hotho, Andreas and Macek, Bjoern-Elmar and Mitzlaff, Folke and Mueller, Juergen and Stumme, Gerd}, doi = {10.1080/13614568.2013.873488}, interhash = {6364e034fa868644b30618dc887c0270}, intrahash = {176e4f2816af5fe1630ed65e062900ce}, journal = {New Review of Hypermedia and Multimedia}, number = 1, pages = {53--77}, title = {{Ubicon and its Applications for Ubiquitous Social Computing}}, url = {http://www.tandfonline.com/doi/abs/10.1080/13614568.2013.873488}, volume = 20, year = 2014 } @article{thurau2012descriptive, abstract = {Climate change, the global energy footprint, and strategies for sustainable development have become topics of considerable political and public interest. The public debate is informed by an exponentially growing amount of data and there are diverse partisan interest when it comes to interpretation. We therefore believe that data analysis methods are called for that provide results which are intuitively understandable even to non-experts. Moreover, such methods should be efficient so that non-experts users can perform their own analysis at low expense in order to understand the effects of different parameters and influential factors. In this paper, we discuss a new technique for factorizing data matrices that meets both these requirements. The basic idea is to represent a set of data by means of convex combinations of extreme data points. This often accommodates human cognition. In contrast to established factorization methods, the approach presented in this paper can also determine over-complete bases. At the same time, convex combinations allow for highly efficient matrix factorization. Based on techniques adopted from the field of distance geometry, we derive a linear time algorithm to determine suitable basis vectors for factorization. By means of the example of several environmental and developmental data sets we discuss the performance and characteristics of the proposed approach and validate that significant efficiency gains are obtainable without performance decreases compared to existing convexity constrained approaches.}, affiliation = {Fraunhofer Institute for Intelligent Analysis and Information Systems IAIS, Sankt Augustin, Germany}, author = {Thurau, Christian and Kersting, Kristian and Wahabzada, Mirwaes and Bauckhage, Christian}, doi = {10.1007/s10618-011-0216-z}, interhash = {457c57f054fea45dcbc8447263591d97}, intrahash = {387f4e1711d7065bd5a94455aeae1957}, issn = {1384-5810}, journal = {Data Mining and Knowledge Discovery}, keyword = {Computer Science}, number = 2, pages = {325-354}, publisher = {Springer Netherlands}, title = {Descriptive matrix factorization for sustainability Adopting the principle of opposites}, url = {http://dx.doi.org/10.1007/s10618-011-0216-z}, volume = 24, year = 2012 } @inproceedings{conf/ai/ShafieiM08, author = {Shafiei, M. Mahdi and Milios, Evangelos E.}, booktitle = {Canadian Conference on AI}, crossref = {conf/ai/2008}, editor = {Bergler, Sabine}, ee = {http://dx.doi.org/10.1007/978-3-540-68825-9_27}, interhash = {1ed1fddf0ac4762ea8debac2ee80b936}, intrahash = {80e27cd4ea288b0ab6bcc1c67841364e}, isbn = {978-3-540-68821-1}, pages = {283-295}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, title = {A Statistical Model for Topic Segmentation and Clustering.}, url = {http://dblp.uni-trier.de/db/conf/ai/ai2008.html#ShafieiM08}, volume = 5032, year = 2008 } @inproceedings{conf/dis/PontiTK11, author = {Ponti, Giovanni and Tagarelli, Andrea and Karypis, George}, booktitle = {Discovery Science}, crossref = {conf/dis/2011}, editor = {Elomaa, Tapio and Hollmén, Jaakko and Mannila, Heikki}, ee = {http://dx.doi.org/10.1007/978-3-642-24477-3_21}, interhash = {1d2b8fd777a36c3c42c10dac886d5d25}, intrahash = {af476c498b77848fa7c8121c8955a307}, isbn = {978-3-642-24476-6}, pages = {247-261}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, title = {A Statistical Model for Topically Segmented Documents.}, url = {http://dblp.uni-trier.de/db/conf/dis/dis2011.html#PontiTK11}, volume = 6926, year = 2011 } @article{journals/ml/DuBJ10, author = {Du, Lan and Buntine, Wray L. and Jin, Huidong}, ee = {http://dx.doi.org/10.1007/s10994-010-5197-4}, interhash = {f39304f04fa411cc2c9232aa7eb83b83}, intrahash = {286291dfe97008c5bda330ffc0b72af1}, journal = {Machine Learning}, number = 1, pages = {5-19}, title = {A segmented topic model based on the two-parameter Poisson-Dirichlet process.}, url = {http://dblp.uni-trier.de/db/journals/ml/ml81.html#DuBJ10}, volume = 81, year = 2010 } @article{singer2013computing, abstract = {In this article, the authors present a novel approach for computing semantic relatedness and conduct a large-scale study of it on Wikipedia. Unlike existing semantic analysis methods that utilize Wikipedia’s content or link structure, the authors propose to use human navigational paths on Wikipedia for this task. The authors obtain 1.8 million human navigational paths from a semi-controlled navigation experiment – a Wikipedia-based navigation game, in which users are required to find short paths between two articles in a given Wikipedia article network. The authors’ results are intriguing: They suggest that (i) semantic relatedness computed from human navigational paths may be more precise than semantic relatedness computed from Wikipedia’s plain link structure alone and (ii) that not all navigational paths are equally useful. Intelligent selection based on path characteristics can improve accuracy. The authors’ work makes an argument for expanding the existing arsenal of data sources for calculating semantic relatedness and to consider the utility of human navigational paths for this task.}, author = {Singer, Philipp and Niebler, Thomas and Strohmaier, Markus and Hotho, Andreas}, doi = {10.4018/ijswis.2013100103}, interhash = {3377abe1838bd1f650b317ed1fca4dfe}, intrahash = {5262c48a2e2791d28610712e3bf5cf55}, issn = {15526283}, journal = {International Journal on Semantic Web and Information Systems (IJSWIS)}, number = 4, pages = {41--70}, publisher = {IGI Global}, refid = {102707}, title = {Computing Semantic Relatedness from Human Navigational Paths: A Case Study on Wikipedia}, url = {http://services.igi-global.com/resolvedoi/resolve.aspx?doi=10.4018/ijswis.2013100103}, volume = 9, year = 2013 } @article{mann1988rhetorical, author = {Mann, William C and Thompson, Sandra A}, interhash = {e8feceeba43734d376da50554b8071d2}, intrahash = {8cedc5c82592cf1bbcfe5fa685fc5c67}, journal = {Text}, number = 3, pages = {243--281}, title = {Rhetorical structure theory: Toward a functional theory of text organization}, url = {http://scholar.google.com/scholar.bib?q=info:BEw8CIWbucoJ:scholar.google.com/&output=citation&scisig=AAGBfm0AAAAAU3X_1Dq4ULnWfFzMeRsqGJcha1fReMSl&scisf=4&hl=en}, volume = 8, year = 1988 } @article{Salton1996127, author = {Salton, Gerard and Allan, James and Singhal, Amit}, doi = {http://dx.doi.org/10.1016/S0306-4573(96)85001-1}, interhash = {2fc2920e08a6a7dda2c256e62fc1e349}, intrahash = {8674111d30a3c67d5d8a8b847cebb771}, issn = {0306-4573}, journal = {Information Processing & Management }, number = 2, pages = {127 - 138}, title = {Automatic text decomposition and structuring }, url = {http://www.sciencedirect.com/science/article/pii/S0306457396850011}, volume = 32, year = 1996 } @inproceedings{Ramage:2009:LLS:1699510.1699543, abstract = {A significant portion of the world's text is tagged by readers on social bookmarking websites. Credit attribution is an inherent problem in these corpora because most pages have multiple tags, but the tags do not always apply with equal specificity across the whole document. Solving the credit attribution problem requires associating each word in a document with the most appropriate tags and vice versa. This paper introduces Labeled LDA, a topic model that constrains Latent Dirichlet Allocation by defining a one-to-one correspondence between LDA's latent topics and user tags. This allows Labeled LDA to directly learn word-tag correspondences. We demonstrate Labeled LDA's improved expressiveness over traditional LDA with visualizations of a corpus of tagged web pages from del.icio.us. Labeled LDA outperforms SVMs by more than 3 to 1 when extracting tag-specific document snippets. As a multi-label text classifier, our model is competitive with a discriminative baseline on a variety of datasets.}, acmid = {1699543}, address = {Stroudsburg, PA, USA}, author = {Ramage, Daniel and Hall, David and Nallapati, Ramesh and Manning, Christopher D.}, booktitle = {Proceedings of the 2009 Conference on Empirical Methods in Natural Language Processing: Volume 1 - Volume 1}, interhash = {45315f4da7b10debdca560506cf0d7ba}, intrahash = {6e7173f084e26bca9a8d2a1ab4a5b709}, isbn = {978-1-932432-59-6}, location = {Singapore}, numpages = {9}, pages = {248--256}, publisher = {Association for Computational Linguistics}, series = {EMNLP '09}, title = {Labeled LDA: A Supervised Topic Model for Credit Attribution in Multi-labeled Corpora}, url = {http://dl.acm.org/citation.cfm?id=1699510.1699543}, year = 2009 } @inproceedings{conf/pkdd/BalasubramanyanDC13, author = {Balasubramanyan, Ramnath and Dalvi, Bhavana Bharat and Cohen, William W.}, booktitle = {ECML/PKDD (2)}, crossref = {conf/pkdd/2013-2}, editor = {Blockeel, Hendrik and Kersting, Kristian and Nijssen, Siegfried and Zelezný, Filip}, ee = {http://dx.doi.org/10.1007/978-3-642-40991-2_40}, interhash = {9a32b7cc059a500ea302d0aa65036682}, intrahash = {e56623d21a1b7bcb442cd15fe098bb70}, isbn = {978-3-642-40990-5}, pages = {628-642}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, title = {From Topic Models to Semi-supervised Learning: Biasing Mixed-Membership Models to Exploit Topic-Indicative Features in Entity Clustering.}, url = {http://dblp.uni-trier.de/db/conf/pkdd/pkdd2013-2.html#BalasubramanyanDC13}, volume = 8189, year = 2013 } @article{BarIlan20081, author = {Bar-Ilan, Judit}, doi = {http://dx.doi.org/10.1016/j.joi.2007.11.001}, interhash = {2399c2cf8ca4ef601d59607bce3df41f}, intrahash = {548c5547f6cdf297fa602fe450ef06a8}, issn = {1751-1577}, journal = {Journal of Informetrics }, number = 1, pages = {1 - 52}, title = {Informetrics at the beginning of the 21st century—A review }, url = {http://www.sciencedirect.com/science/article/pii/S1751157707000740}, volume = 2, year = 2008 } @article{Alonso2009273, author = {Alonso, S. and Cabrerizo, F.J. and Herrera-Viedma, E. and Herrera, F.}, doi = {http://dx.doi.org/10.1016/j.joi.2009.04.001}, interhash = {cbf95718465346edecef397149e4cf51}, intrahash = {859c208f329fa96e26e35f1bcb7ab65d}, issn = {1751-1577}, journal = {Journal of Informetrics }, number = 4, pages = {273 - 289}, title = {h-Index: A review focused in its variants, computation and standardization for different scientific fields }, url = {http://www.sciencedirect.com/science/article/pii/S1751157709000339}, volume = 3, year = 2009 } @inproceedings{Sautter:2012:IBR:2403832.2403883, abstract = {Parsing details like author names and titles out of bibliographic references of scientific publications is an important issue. However, most existing techniques are tailored to the highly standardized reference styles used in the last two to three decades. Their performance tends to degrade when faced with the wider variety of reference styles used in older, historic publications. Thus, existing techniques are of limited use when creating comprehensive bibliographies covering both historic and contemporary scientific publications. This paper presents RefParse, a generic approach to bibliographic reference parsing that is independent of any specific reference style. Its core feature is an inference mechanism that exploits the regularities inherent in any list of references to deduce its format. Our evaluation shows that RefParse outperforms existing parsers both for contemporary and for historic reference lists.}, acmid = {2403883}, address = {Berlin, Heidelberg}, author = {Sautter, Guido and B\"{o}hm, Klemens}, booktitle = {Proceedings of the Second International Conference on Theory and Practice of Digital Libraries}, doi = {10.1007/978-3-642-33290-6_40}, interhash = {20fe241af3945dca2e242ae72eae05ad}, intrahash = {ce9a27e85a0cc6bef109d5130e7ed1ea}, isbn = {978-3-642-33289-0}, location = {Paphos, Cyprus}, numpages = {13}, pages = {370--382}, publisher = {Springer-Verlag}, series = {TPDL'12}, title = {Improved Bibliographic Reference Parsing Based on Repeated Patterns}, url = {http://dx.doi.org/10.1007/978-3-642-33290-6_40}, year = 2012 } @inproceedings{conf/kdd/HongYGD11, author = {Hong, Liangjie and Yin, Dawei and 0002, Jian Guo and 0001, Brian D. Davison}, booktitle = {KDD}, crossref = {conf/kdd/2011}, editor = {Apté, Chid and Ghosh, Joydeep and Smyth, Padhraic}, ee = {http://doi.acm.org/10.1145/2020408.2020485}, interhash = {35519287a72896f1adee0aaf14430dd8}, intrahash = {a636ba59e9c57611c070e30086b27592}, isbn = {978-1-4503-0813-7}, pages = {484-492}, publisher = {ACM}, title = {Tracking trends: incorporating term volume into temporal topic models.}, url = {http://dblp.uni-trier.de/db/conf/kdd/kdd2011.html#HongYGD11}, year = 2011 } @incollection{bleigjt03, address = {Cambridge, MA}, author = {Blei, D. M. and Griffiths, T. L. and Jordan, M. I. and Tenenbaum, J. B.}, booktitle = {Advances in {N}eural {I}nformation {P}rocessing {S}ystems 16}, interhash = {f185b4657e25c733ee613bece516b3c5}, intrahash = {3e438204424fa2c6e8915bd8f0baf112}, publisher = {MIT Press}, title = {Hierarchical topic models and the nested {C}hinese restaurant process}, year = 2004 } @misc{kang2013lalda, abstract = {Social media users have finite attention which limits the number of incoming messages from friends they can process. Moreover, they pay more attention to opinions and recommendations of some friends more than others. In this paper, we propose LA-LDA, a latent topic model which incorporates limited, non-uniformly divided attention in the diffusion process by which opinions and information spread on the social network. We show that our proposed model is able to learn more accurate user models from users' social network and item adoption behavior than models which do not take limited attention into account. We analyze voting on news items on the social news aggregator Digg and show that our proposed model is better able to predict held out votes than alternative models. Our study demonstrates that psycho-socially motivated models have better ability to describe and predict observed behavior than models which only consider topics.}, author = {Kang, Jeon-Hyung and Lerman, Kristina and Getoor, Lise}, interhash = {18a900ae003a2aedb3879fcaaa4e89b6}, intrahash = {84ae222ddb615ca8ae9421a29c07a8f6}, note = {cite arxiv:1301.6277Comment: The 2013 International Conference on Social Computing, Behavioral-Cultural Modeling, & Prediction (SBP 2013)}, title = {LA-LDA: A Limited Attention Topic Model for Social Recommendation}, url = {http://arxiv.org/abs/1301.6277}, year = 2013 } @misc{goldenberg2009survey, abstract = {Networks are ubiquitous in science and have become a focal point for discussion in everyday life. Formal statistical models for the analysis of network data have emerged as a major topic of interest in diverse areas of study, and most of these involve a form of graphical representation. Probability models on graphs date back to 1959. Along with empirical studies in social psychology and sociology from the 1960s, these early works generated an active network community and a substantial literature in the 1970s. This effort moved into the statistical literature in the late 1970s and 1980s, and the past decade has seen a burgeoning network literature in statistical physics and computer science. The growth of the World Wide Web and the emergence of online networking communities such as Facebook, MySpace, and LinkedIn, and a host of more specialized professional network communities has intensified interest in the study of networks and network data. Our goal in this review is to provide the reader with an entry point to this burgeoning literature. We begin with an overview of the historical development of statistical network modeling and then we introduce a number of examples that have been studied in the network literature. Our subsequent discussion focuses on a number of prominent static and dynamic network models and their interconnections. We emphasize formal model descriptions, and pay special attention to the interpretation of parameters and their estimation. We end with a description of some open problems and challenges for machine learning and statistics.}, author = {Goldenberg, Anna and Zheng, Alice X and Fienberg, Stephen E and Airoldi, Edoardo M}, interhash = {bab22de06306d84cf357aadf48982d87}, intrahash = {5e341981218d7cd89416c3371d56c794}, note = {cite arxiv:0912.5410Comment: 96 pages, 14 figures, 333 references}, title = {A survey of statistical network models}, url = {http://arxiv.org/abs/0912.5410}, year = 2009 } @inproceedings{journals/jmlr/ChangB09, author = {Chang, Jonathan and Blei, David M.}, booktitle = {AISTATS}, crossref = {conf/aistats/2009}, editor = {Dyk, David A. Van and Welling, Max}, ee = {http://www.jmlr.org/proceedings/papers/v5/chang09a.html}, interhash = {f3431fd69b315a22422a2c0f15ee0b71}, intrahash = {86f665b74ecabb56e81542e0f052a331}, pages = {81-88}, publisher = {JMLR.org}, series = {JMLR Proceedings}, title = {Relational Topic Models for Document Networks.}, url = {http://dblp.uni-trier.de/db/journals/jmlr/jmlrp5.html#ChangB09}, volume = 5, year = 2009 } @misc{doerfel2014course, abstract = {Social tagging systems have established themselves as an important part in today's web and have attracted the interest from our research community in a variety of investigations. The overall vision of our community is that simply through interactions with the system, i.e., through tagging and sharing of resources, users would contribute to building useful semantic structures as well as resource indexes using uncontrolled vocabulary not only due to the easy-to-use mechanics. Henceforth, a variety of assumptions about social tagging systems have emerged, yet testing them has been difficult due to the absence of suitable data. In this work we thoroughly investigate three available assumptions - e.g., is a tagging system really social? - by examining live log data gathered from the real-world public social tagging system BibSonomy. Our empirical results indicate that while some of these assumptions hold to a certain extent, other assumptions need to be reflected and viewed in a very critical light. Our observations have implications for the design of future search and other algorithms to better reflect the actual user behavior.}, author = {Doerfel, Stephan and Zoller, Daniel and Singer, Philipp and Niebler, Thomas and Hotho, Andreas and Strohmaier, Markus}, interhash = {65f287480af20fc407f7d26677f17b72}, intrahash = {988ea3a9b85ec0656e27750e4080325c}, note = {cite arxiv:1401.0629}, title = {Of course we share! Testing Assumptions about Social Tagging Systems}, url = {http://arxiv.org/abs/1401.0629}, year = 2014 } @article{Jain:1999:DCR:331499.331504, abstract = {Clustering is the unsupervised classification of patterns (observations, data items, or feature vectors) into groups (clusters). The clustering problem has been addressed in many contexts and by researchers in many disciplines; this reflects its broad appeal and usefulness as one of the steps in exploratory data analysis. However, clustering is a difficult problem combinatorially, and differences in assumptions and contexts in different communities has made the transfer of useful generic concepts and methodologies slow to occur. This paper presents an overviewof pattern clustering methods from a statistical pattern recognition perspective, with a goal of providing useful advice and references to fundamental concepts accessible to the broad community of clustering practitioners. We present a taxonomy of clustering techniques, and identify cross-cutting themes and recent advances. We also describe some important applications of clustering algorithms such as image segmentation, object recognition, and information retrieval.}, acmid = {331504}, address = {New York, NY, USA}, author = {Jain, A. K. and Murty, M. N. and Flynn, P. J.}, doi = {10.1145/331499.331504}, hans = {otto}, interhash = {5113b61d428d4de4423182e5f2b2f468}, intrahash = {bd7234f7139a1651acfaed57b5c2551f}, issn = {0360-0300}, issue_date = {Sept. 1999}, journal = {ACM Comput. Surv.}, month = sep, number = 3, numpages = {60}, pages = {264--323}, publisher = {ACM}, title = {Data Clustering: A Review}, url = {http://doi.acm.org/10.1145/331499.331504}, volume = 31, year = 1999 } @article{Hoonlor:2013:TCS:2507771.2500892, abstract = {Keywords in the ACM Digital Library and IEEE Xplore digital library and in NSF grants anticipate future CS research.}, acmid = {2500892}, address = {New York, NY, USA}, author = {Hoonlor, Apirak and Szymanski, Boleslaw K. and Zaki, Mohammed J.}, doi = {10.1145/2500892}, interhash = {425133ebceab2bce5f418ffd9917df55}, intrahash = {4a2aee492bfcfcdbbcc7774bdcddd4a2}, issn = {0001-0782}, issue_date = {October 2013}, journal = {Commun. ACM}, month = oct, number = 10, numpages = {10}, pages = {74--83}, publisher = {ACM}, title = {Trends in Computer Science Research}, url = {http://doi.acm.org/10.1145/2500892}, volume = 56, year = 2013 } @proceedings{conf/recsys/2013rsweb, booktitle = {RSWeb@RecSys}, editor = {Mobasher, Bamshad and Jannach, Dietmar and Geyer, Werner and Freyne, Jill and Hotho, Andreas and Anand, Sarabjot Singh and Guy, Ido}, ee = {http://ceur-ws.org/Vol-1066}, interhash = {31e724c09d1f4a4bbf013ecb8e1f6685}, intrahash = {aca768068f09003e97b51d48ec092ddc}, publisher = {CEUR-WS.org}, series = {CEUR Workshop Proceedings}, title = {Proceedings of the Fifth ACM RecSys Workshop on Recommender Systems and the Social Web co-located with the 7th ACM Conference on Recommender Systems (RecSys 2013), Hong Kong, China, October 13, 2013.}, url = {http://ceur-ws.org/Vol-1066}, volume = 1066, year = 2013 } @book{atzmueller2013ubiquitous, address = {Berlin, Heidelberg}, editor = {Atzmueller, Martin and Chin, Alvin and Helic, Denis and Hotho, Andreas}, interhash = {b0fcec93b875c8b0060087bc07944e89}, intrahash = {1e2d036351662d35ef95719554d37e46}, isbn = {9783642453915 3642453910 9783642453922 3642453929}, publisher = {Imprint: Springer}, refid = {867052137}, title = {Ubiquitous Social Media Analysis Third International Workshops, MUSE 2012, Bristol, UK, September 24, 2012, and MSM 2012, Milwaukee, WI, USA, June 25, 2012, Revised Selected Papers}, url = {http://link.springer.com/book/10.1007/978-3-642-45392-2}, year = 2013 } @inproceedings{MASH:13b, address = {Bamberg, Germany}, author = {Mitzlaff, Folke and Atzmueller, Martin and Stumme, Gerd and Hotho, Andreas}, booktitle = {Proc. LWA 2013 (KDML Special Track)}, interhash = {73088600a500f7d06768615d6e1c2b3d}, intrahash = {820ffb2166b330bf60bb30b16e426553}, publisher = {University of Bamberg}, title = {{On the Semantics of User Interaction in Social Media (Extended Abstract, Resubmission)}}, year = 2011 } @article{journals/corr/MitzlaffABHS13, author = {Mitzlaff, Folke and Atzmueller, Martin and Benz, Dominik and Hotho, Andreas and Stumme, Gerd}, ee = {http://arxiv.org/abs/1309.3888}, interhash = {40aa075d925f2e6e009986fd9e60b11b}, intrahash = {6f8017b9b01047d88b8e092747e25c4b}, journal = {CoRR}, title = {User-Relatedness and Community Structure in Social Interaction Networks.}, url = {http://dblp.uni-trier.de/db/journals/corr/corr1309.html#MitzlaffABHS13}, volume = {abs/1309.3888}, year = 2013 } @book{doerfel2013informationelle, author = {Doerfel, Stephan and Hotho, Andreas and Kartal-Aydemir, Aliye and Roßnagel, Alexander and Stumme, Gerd}, interhash = {f72d297ba42797ca66baba052c846b7a}, intrahash = {2bb934c0ff3652843fd0aff97d8d7324}, isbn = {9783642380556 3642380557}, publisher = {Vieweg + Teubner Verlag}, refid = {857973438}, title = {Informationelle Selbstbestimmung Im Web 2.0 Chancen Und Risiken Sozialer Verschlagwortungssysteme}, url = {http://www.worldcat.org/search?qt=worldcat_org_all&q=9783642380556}, year = 2013 } @inproceedings{mueller2013recommendations, abstract = {With the rising popularity of smart mobile devices, sensor data-based applications have become more and more popular. Their users record data during their daily routine or specifically for certain events. The application WideNoise Plus allows users to record sound samples and to annotate them with perceptions and tags. The app is being used to document and map the soundscape all over the world. The procedure of recording, including the assignment of tags, has to be as easy-to-use as possible. We therefore discuss the application of tag recommender algorithms in this particular scenario. We show, that this task is fundamentally different from the well-known tag recommendation problem in folksonomies as users do no longer tag fix resources but rather sensory data and impressions. The scenario requires efficient recommender algorithms that are able to run on the mobile device, since Internet connectivity cannot be assumed to be available. Therefore, we evaluate the performance of several tag recommendation algorithms and discuss their applicability in the mobile sensing use-case.}, address = {Aachen, Germany}, author = {Mueller, Juergen and Doerfel, Stephan and Becker, Martin and Hotho, Andreas and Stumme, Gerd}, booktitle = {Recommender Systems and the Social Web Workshop at 7th ACM Conference on Recommender Systems, RecSys 2013, Hong Kong, China -- October 12-16, 2013. Proceedings}, interhash = {23d1cf49208d9a0c8b883dc69d4e444d}, intrahash = {2bab3f013052bc741e795c5c61aea5c9}, issn = {1613-0073}, publisher = {CEUR-WS}, title = {Tag Recommendations for SensorFolkSonomies}, url = {http://ceur-ws.org/Vol-1066/}, volume = 1066, year = 2013 } @article{10.1371/journal.pone.0081638, abstract = {

The development of ICT infrastructures has facilitated the emergence of new paradigms for looking at society and the environment over the last few years. Participatory environmental sensing, i.e. directly involving citizens in environmental monitoring, is one example, which is hoped to encourage learning and enhance awareness of environmental issues. In this paper, an analysis of the behaviour of individuals involved in noise sensing is presented. Citizens have been involved in noise measuring activities through the WideNoise smartphone application. This application has been designed to record both objective (noise samples) and subjective (opinions, feelings) data. The application has been open to be used freely by anyone and has been widely employed worldwide. In addition, several test cases have been organised in European countries. Based on the information submitted by users, an analysis of emerging awareness and learning is performed. The data show that changes in the way the environment is perceived after repeated usage of the application do appear. Specifically, users learn how to recognise different noise levels they are exposed to. Additionally, the subjective data collected indicate an increased user involvement in time and a categorisation effect between pleasant and less pleasant environments.

}, author = {Becker, Martin and Caminiti, Saverio and Fiorella, Donato and Francis, Louise and Gravino, Pietro and Haklay, Mordechai (Muki) and Hotho, Andreas and Loreto, Vittorio and Mueller, Juergen and Ricchiuti, Ferdinando and Servedio, Vito D. P. and Sîrbu, Alina and Tria, Francesca}, doi = {10.1371/journal.pone.0081638}, interhash = {52652b4fe271d8be4b96b2f692fe9519}, intrahash = {423a8aaa4eb317ee507143293205c76f}, journal = {PLoS ONE}, month = {12}, number = 12, pages = {e81638}, publisher = {Public Library of Science}, title = {Awareness and Learning in Participatory Noise Sensing}, url = {http://dx.doi.org/10.1371%2Fjournal.pone.0081638}, volume = 8, year = 2013 } @inproceedings{Stenneth:2011:TMD:2093973.2093982, abstract = {The transportation mode such as walking, cycling or on a train denotes an important characteristic of the mobile user's context. In this paper, we propose an approach to inferring a user's mode of transportation based on the GPS sensor on her mobile device and knowledge of the underlying transportation network. The transportation network information considered includes real time bus locations, spatial rail and spatial bus stop information. We identify and derive the relevant features related to transportation network information to improve classification effectiveness. This approach can achieve over 93.5% accuracy for inferring various transportation modes including: car, bus, aboveground train, walking, bike, and stationary. Our approach improves the accuracy of detection by 17% in comparison with the GPS only approach, and 9% in comparison with GPS with GIS models. The proposed approach is the first to distinguish between motorized transportation modes such as bus, car and aboveground train with such high accuracy. Additionally, if a user is travelling by bus, we provide further information about which particular bus the user is riding. Five different inference models including Bayesian Net, Decision Tree, Random Forest, Naïve Bayesian and Multilayer Perceptron, are tested in the experiments. The final classification system is deployed and available to the public.}, acmid = {2093982}, address = {New York, NY, USA}, author = {Stenneth, Leon and Wolfson, Ouri and Yu, Philip S. and Xu, Bo}, booktitle = {Proceedings of the 19th ACM SIGSPATIAL International Conference on Advances in Geographic Information Systems}, doi = {10.1145/2093973.2093982}, interhash = {07950385ca6bb9138db4f20bb3dd7698}, intrahash = {6eff579bee29983fbf72403faa9b04ae}, isbn = {978-1-4503-1031-4}, location = {Chicago, Illinois}, numpages = {10}, pages = {54--63}, publisher = {ACM}, series = {GIS '11}, title = {Transportation Mode Detection Using Mobile Phones and GIS Information}, url = {http://doi.acm.org/10.1145/2093973.2093982}, year = 2011 } @article{fu2010using, abstract = {The most popular method for judging the impact of biomedical articles is citation count which is the number of citations received. The most significant limitation of citation count is that it cannot evaluate articles at the time of publication since citations accumulate over time. This work presents computer models that accurately predict citation counts of biomedical publications within a deep horizon of 10 years using only predictive information available at publication time. Our experiments show that it is indeed feasible to accurately predict future citation counts with a mixture of content-based and bibliometric features using machine learning methods. The models pave the way for practical prediction of the long-term impact of publication, and their statistical analysis provides greater insight into citation behavior.}, affiliation = {Center for Health Informatics and Bioinformatics, New York University Medical Center, 333 E. 38th St, 6th Floor, New York, NY 10016, USA}, author = {Fu, Lawrence D. and Aliferis, Constantin F.}, doi = {10.1007/s11192-010-0160-5}, interhash = {5502184494caab8c56056b7a9d92cb15}, intrahash = {e45088bdacbda5a5e8e6f293dcbca995}, issn = {0138-9130}, journal = {Scientometrics}, keyword = {Computer Science}, number = 1, pages = {257-270}, publisher = {Akadémiai Kiadó, co-published with Springer Science+Business Media B.V., Formerly Kluwer Academic Publishers B.V.}, title = {Using content-based and bibliometric features for machine learning models to predict citation counts in the biomedical literature}, url = {http://dx.doi.org/10.1007/s11192-010-0160-5}, volume = 85, year = 2010 } @book{DBLP:books/crc/aggarwal2013, bibsource = {DBLP, http://dblp.uni-trier.de}, editor = {Aggarwal, Charu C. and Reddy, Chandan K.}, ee = {http://www.crcpress.com/product/isbn/9781466558212, http://www.charuaggarwal.net/clusterbook.pdf}, interhash = {5f150f838457faaa3805b0ed034c845f}, intrahash = {7f1541e5800e6c36c67dd6bc0ef64ba7}, isbn = {978-1-46-655821-2}, publisher = {CRC Press}, title = {Data Clustering: Algorithms and Applications}, url = {http://www.charuaggarwal.net/clusterbook.pdf}, year = 2014 } @misc{lan2013joint, abstract = {Modern machine learning methods are critical to the development of large-scale personalized learning systems that cater directly to the needs of individual learners. The recently developed SPARse Factor Analysis (SPARFA) framework provides a new statistical model and algorithms for machine learning-based learning analytics, which estimate a learner's knowledge of the latent concepts underlying a domain, and content analytics, which estimate the relationships among a collection of questions and the latent concepts. SPARFA estimates these quantities given only the binary-valued graded responses to a collection of questions. In order to better interpret the estimated latent concepts, SPARFA relies on a post-processing step that utilizes user-defined tags (e.g., topics or keywords) available for each question. In this paper, we relax the need for user-defined tags by extending SPARFA to jointly process both graded learner responses and the text of each question and its associated answer(s) or other feedback. Our purely data-driven approach (i) enhances the interpretability of the estimated latent concepts without the need of explicitly generating a set of tags or performing a post-processing step, (ii) improves the prediction performance of SPARFA, and (iii) scales to large test/assessments where human annotation would prove burdensome. We demonstrate the efficacy of the proposed approach on two real educational datasets.}, author = {Lan, Andrew S. and Studer, Christoph and Waters, Andrew E. and Baraniuk, Richard G.}, interhash = {911707523671c994e5c3fe63c3df5c4a}, intrahash = {2a8df43258181ed85e5d43b489fd45fb}, note = {cite arxiv:1305.1956}, title = {Joint Topic Modeling and Factor Analysis of Textual Information and Graded Response Data}, url = {http://arxiv.org/abs/1305.1956}, year = 2013 } @article{landia2013deeper, abstract = {The information contained in social tagging systems is often modelled as a graph of connections between users, items and tags. Recommendation algorithms such as FolkRank, have the potential to leverage complex relationships in the data, corresponding to multiple hops in the graph. We present an in-depth analysis and evaluation of graph models for social tagging data and propose novel adaptations and extensions of FolkRank to improve tag recommendations. We highlight implicit assumptions made by the widely used folksonomy model, and propose an alternative and more accurate graph-representation of the data. Our extensions of FolkRank address the new item problem by incorporating content data into the algorithm, and significantly improve prediction results on unpruned datasets. Our adaptations address issues in the iterative weight spreading calculation that potentially hinder FolkRank's ability to leverage the deep graph as an information source. Moreover, we evaluate the benefit of considering each deeper level of the graph, and present important insights regarding the characteristics of social tagging data in general. Our results suggest that the base assumption made by conventional weight propagation methods, that closeness in the graph always implies a positive relationship, does not hold for the social tagging domain.}, author = {Landia, Nikolas and Doerfel, Stephan and Jäschke, Robert and Anand, Sarabjot Singh and Hotho, Andreas and Griffiths, Nathan}, interhash = {e8095b13630452ce3ecbae582f32f4bc}, intrahash = {e585a92994be476480545eb62d741642}, journal = {cs.IR}, title = {Deeper Into the Folksonomy Graph: FolkRank Adaptations and Extensions for Improved Tag Recommendations}, url = {http://arxiv.org/abs/1310.1498}, volume = {1310.1498}, year = 2013 } @incollection{MASH:13, address = {Heidelberg, Germany}, author = {Mitzlaff, Folke and Atzmueller, Martin and Stumme, Gerd and Hotho, Andreas}, booktitle = {Complex Networks IV}, doi = {10.1007/978-3-642-36844-8_2}, editor = {Ghoshal, Gourab and Poncela-Casasnovas, Julia and Tolksdorf, Robert}, interhash = {bf333426bb7af5f01bf0c465c1bfe1fc}, intrahash = {0a35f1ed66fcd342a6a44d70c63fb735}, optisbn = {978-3-642-36843-1}, opturl = {http://dx.doi.org/10.1007/978-3-642-36844-8_2}, publisher = {Springer Verlag}, series = {Studies in Computational Intelligence}, title = {{Semantics of User Interaction in Social Media}}, volume = 476, year = 2013 } @article{piatkowski2013spatiotemporal, author = {Piatkowski, Nico and Lee, Sangkyun and Morik, Katharina}, doi = {10.1007/s10994-013-5399-7}, interhash = {314e29a1c444118b8a4e8d2ba7ab6336}, intrahash = {eed8d4fcd9cfc30c01c1bf72e8e9cdbb}, issn = {0885-6125}, journal = {Machine Learning}, language = {English}, number = 1, pages = {115-139}, publisher = {Springer US}, title = {Spatio-temporal random fields: compressible representation and distributed estimation}, url = {http://dx.doi.org/10.1007/s10994-013-5399-7}, volume = 93, year = 2013 } @article{steenweg2010publikationsmanagement, abstract = {An den Hochschulen kommt der Förderung einer zeitgemäßen Publikationsumgebung steigende Bedeutung zu. Die Interessenlage ist vielschichtig. Autoren möchten eine komfortable Arbeitsumgebung, die Hochschulpräsidien benötigen Forschungsinformationen und die Bibliotheken wollen passende Informationsinfrastrukturen bereitzustellen. An der Universität Kassel wurde ausgehend vom Bedürfnis des wissenschaftlichen Autors in einem Pilotprojekt (PUMA) versucht, diese Interessen zu einem Publikationsmanagement zu vereinbaren. Für den Autor wird in PUMA bei deutlich geringerem Einsatz ein erheblicher Mehrwert dadurch generiert, dass bei nur einmaligen Anmelden mit dem Bibliotheks-Account ein Social-Bookmarking-System (BibSonomy) zur Verfügung steht, automatisiert Informationen an den Forschungsbericht weitergegeben, Metadaten und Dateien in Repositorien gestellt und Schriftenverzeichnisse (Curriculum Vitae) für Homepages etc. erstellt werden können.}, author = {Steenweg, Helge}, interhash = {f7b1d913b85a3d4b5ae1256c77ffa9e5}, intrahash = {bca65ebab3f638fae16a46620c4fb08a}, journal = {ABI-Technik}, number = 2, pages = {130-138.}, title = {Publikationsmanagement - eine wichtige zukünftige Aufgabe an Hochschulen. Wie sind Forschungsbericht, Institutional Repository und die Interessen des wissenschaftlichen Autors vereinbar? - Das Projekt PUMA}, volume = 30, year = 2010 } @incollection{pubman2012, abstract = {Universität und wissenschaftliches Publizieren gehören zusammen. Dabei kommt dem Publikationsmanagement an Hochschulen immer mehr an Bedeutung zu. Die Interessenlage ist vielschichtig. Autoren wünschen komfortable Arbeitsumgebungen, die Hochschulpräsidien benötigen Forschungsinformationen, und Bibliotheken stellen mit Reposi-torien Infrastrukturen zur Verfügung. Neue Trends im Bibliothekswesen bringen weitreichende Veränderungen technologischer und organisatorischer Art. Ausgehend von den Bedürfnissen von Autoren wurde an der Universität von Kassel ein Projekt namens PUMA aufgesetzt, um die verschiedenen Interessen in einer modernen Umgebung zusam-menzuführen. Innerhalb von PUMA finden Autoren neben einem Social-Bookmarking- und Bibliographie-System eine automatische Weitergabe ihrer Metadaten an Forschungs-informationssysteme, Repositorien und E-Learning-Systeme. }, author = {Steenweg, Helge}, booktitle = {Die neue Bibliothek - Anspruch und Wirklichkeit: 31. Österreichischer Bibliothekartag, Innsbruck, 18. - 21.10.2011}, editor = {Niedermair, Klaus and et.al.}, interhash = {41823a1503f115642d8fba7d7b7d63ff}, intrahash = {a6faacc3dd119efa53e0ef088da781ec}, pages = {89 - 95.}, title = {Publikationsmanagement an Hochschulen. Wie erreiche ich mehr durch weniger? }, year = 2012 } @inproceedings{Halle:2010, author = {Benz, Dominik and Hotho, Andreas and Jäschke, Robert and Stumme, Gerd and Halle, Axel and Lima-Gerlach, Angela Sanches and Steenweg, Helge and Stefani, Sven}, booktitle = {Proceedings of the 14. European Conference on Research and Advanced Technology for Digital Libraries }, interhash = {6769f676a73338ca4a431d47f2f5d3ff}, intrahash = {a7a715d741c77bc59d61919cde3e29f9}, pages = {417-420.}, title = {Academic Publication Management with PUMA - collect, organize and share publications}, url = {http://www.springerlink.com/content/73128285273l43mp/}, year = 2010 }