@article{rauber2009webarchivierung,
  abstract = { In den letzten Jahren haben Bibliotheken und Archive zunehmend die Aufgabe übernommen, neben konventionellen Publikationen auch Inhalte aus dem World Wide Web zu sammeln, um so diesen wertvollen Teil unseres kulturellen Erbes zu bewahren und wichtige Informationen langfristig verfügbar zu halten. Diese massiven Datensammlungen bieten faszinierende Möglichkeiten, rasch Zugriff auf wichtige Informationen zu bekommen, die im Live-Web bereits verloren gegangen sind. Sie sind eine unentbehrliche Quelle für Wissenschaftler, die in der Zukunft die gesellschaftliche und technologische Entwicklung unserer Zeit nachvollziehen wollen.  Auf der anderen Seite stellt eine derartige Datensammlung aber einen völlig neuen Datenbestand dar, der nicht nur rechtliche, sondern auch zahlreiche ethische Fragen betreffend seine Nutzung aufwirft. Diese werden in dem Ausmaß zunehmen, in dem die technischen Möglichkeiten zur automatischen Analyse und Interpretation dieser Daten leistungsfähiger werden. Da sich die meisten Webarchivierungsinitiativen dieser Problematik bewusst sind, bleibt die Nutzung der Daten derzeit meist stark eingeschränkt, oder es wird eine Art von "Opt-Out"-Möglichkeit vorgesehen, wodurch Webseiteninhaber die Aufnahme ihrer Seiten in ein Webarchiv ausschließen können. Mit beiden Ansätzen können Webarchive ihr volles Nutzungspotenzial nicht ausschöpfen.  Dieser Artikel beschreibt einleitend kurz die Technologien, die zur Sammlung von Webinhalten zu Archivierungszwecken verwendet werden. Er hinterfragt Annahmen, die die freie Verfügbarkeit der Daten und unterschiedliche Nutzungsarten betreffen. Darauf aufbauend identifiziert er eine Reihe von offenen Fragen, deren Lösung einen breiteren Zugriff und bessere Nutzung von Webarchiven erlauben könnte. },
  author = {Rauber, Andreas and Kaiser, Max},
  editor = {Knoll, Matthias and Meier, Andreas},
  interhash = {3b35b676a2817868d93481aeebfa4154},
  intrahash = {cdaef18169a7d8300cf54daf018a74cc},
  issn = {1436-3011},
  journal = {HMD Praxis der Wirtschaftsinformatik},
  month = aug,
  publisher = {dpunkt.verlag},
  title = {Webarchivierung und Web Archive Mining: Notwendigkeit, Probleme und Lösungsansätze},
  url = {http://hmd.dpunkt.de/268/03.php},
  volume = 268,
  year = 2009
}

@article{song2010limits,
  abstract = {A range of applications, from predicting the spread of human and electronic viruses to city planning and resource management in mobile communications, depend on our ability to foresee the whereabouts and mobility of individuals, raising a fundamental question: To what degree is human behavior predictable? Here we explore the limits of predictability in human dynamics by studying the mobility patterns of anonymized mobile phone users. By measuring the entropy of each individual's trajectory, we find a 93% potential predictability in user mobility across the whole user base. Despite the significant differences in the travel patterns, we find a remarkable lack of variability in predictability, which is largely independent of the distance users cover on a regular basis.
},
  author = {Song, Chaoming and Qu, Zehui and Blumm, Nicholas and Barabási, Albert-László},
  doi = {10.1126/science.1177170},
  eprint = {http://www.sciencemag.org/cgi/reprint/327/5968/1018.pdf},
  interhash = {f2611a08bf6db54f86e884c05f3cb5fb},
  intrahash = {a89330f8eb32ce62b5f5c9a2b4909f25},
  journal = {Science},
  number = 5968,
  pages = {1018--1021},
  title = {Limits of Predictability in Human Mobility},
  url = {http://www.sciencemag.org/cgi/content/abstract/327/5968/1018},
  volume = 327,
  year = 2010
}

@inproceedings{zheleva2009join,
  abstract = {In order to address privacy concerns, many social media websites allow users to hide their personal profiles from the public. In this work, we show how an adversary can exploit an online social network with a mixture of public and private user profiles to predict the private attributes of users. We map this problem to a relational classification problem and we propose practical models that use friendship and group membership information (which is often not hidden) to infer sensitive attributes. The key novel idea is that in addition to friendship links, groups can be carriers of significant information. We show that on several well-known social media sites, we can easily and accurately recover the information of private-profile users. To the best of our knowledge, this is the first work that uses link-based and group-based classification to study privacy implications in social networks with mixed public and private user profiles.},
  address = {New York, NY, USA},
  author = {Zheleva, Elena and Getoor, Lise},
  booktitle = {WWW '09: Proceedings of the 18th International Conference on World Wide Web},
  doi = {10.1145/1526709.1526781},
  interhash = {4726d0a13b0337998d6d0f54fc5c26e9},
  intrahash = {25e6c200ace070886f01d7d30957b504},
  isbn = {978-1-60558-487-4},
  location = {Madrid, Spain},
  month = apr,
  pages = {531--540},
  publisher = {ACM},
  title = {To join or not to join: the illusion of privacy in social networks with mixed public and private user profiles},
  url = {http://portal.acm.org/citation.cfm?id=1526709.1526781&coll=GUIDE&dl=acm&type=series&idx=SERIES968&part=series&WantType=Proceedings&title=WWW},
  year = 2009
}

@inproceedings{narayanan2008robust,
  abstract = {We present a new class of statistical de- anonymization attacks against high-dimensional micro-data, such as individual preferences, recommendations, transaction records and so on. Our techniques are robust to perturbation in the data and tolerate some mistakes in the adversary's background knowledge. We apply our de-anonymization methodology to the Netflix Prize dataset, which contains anonymous movie ratings of 500,000 subscribers of Netflix, the world's largest online movie rental service. We demonstrate that an adversary who knows only a little bit about an individual subscriber can easily identify this subscriber's record in the dataset. Using the Internet Movie Database as the source of background knowledge, we successfully identified the Netflix records of known users, uncovering their apparent political preferences and other potentially sensitive information.},
  author = {Narayanan, Arvind and Shmatikov, Vitaly},
  booktitle = {Proc. of the 29th IEEE Symposium on Security and Privacy},
  doi = {10.1109/SP.2008.33},
  interhash = {77c86be6c4bf7fc51b7faecfe85479fe},
  intrahash = {2748ba4684dbe09120aee56c6a0a9de9},
  issn = {1081-6011},
  month = may,
  pages = {111--125},
  publisher = {IEEE Computer Society},
  title = {Robust De-anonymization of Large Sparse Datasets},
  url = {http://www.cs.utexas.edu/~shmat/shmat_oak08netflix.pdf},
  year = 2008
}