@article{becker2013human,
  abstract = {Anonymous location data from cellular phone networks sheds light on how people move around on a large scale.},
  acmid = {2398375},
  address = {New York, NY, USA},
  author = {Becker, Richard and C\'{a}ceres, Ram\'{o}n and Hanson, Karrie and Isaacman, Sibren and Loh, Ji Meng and Martonosi, Margaret and Rowland, James and Urbanek, Simon and Varshavsky, Alexander and Volinsky, Chris},
  doi = {10.1145/2398356.2398375},
  interhash = {a12186255089937b97b17bd46c1dc689},
  intrahash = {98da3ee109ea87bdb735cbda1fd202a7},
  issn = {0001-0782},
  issue_date = {January 2013},
  journal = {Communications of the ACM},
  month = jan,
  number = 1,
  numpages = {9},
  pages = {74--82},
  publisher = {ACM},
  title = {Human mobility characterization from cellular network data},
  url = {http://doi.acm.org/10.1145/2398356.2398375},
  volume = 56,
  year = 2013
}

@phdthesis{leidner2007toponym,
  abstract = {Background. In the area of Geographic Information Systems (GIS), a shared discipline between informatics and geography, the term geo-parsing is used to describe the process of identifying names in text, which in computational linguistics is known as named entity recognition and classification (NERC). The term geo-coding is used for the task of mapping from implicitly geo-referenced datasets (such as structured address records) to explicitly geo-referenced representations (e.g., using latitude and longitude). However, present-day GIS systems provide no automatic geo-coding functionality for unstructured text. In Information Extraction (IE), processing of named entities in text has traditionally been seen as a two-step process comprising a flat text span recognition sub-task and an atomic classification sub-task; relating the text span to a model of the world has been ignored by evaluations such as MUC or ACE (Chinchor (1998); U.S. NIST (2003)). However, spatial and temporal expressions refer to events in space-time, and the grounding of events is a precondition for accurate reasoning. Thus, automatic grounding can improve many applications such as automatic map drawing (e.g. for choosing a focus) and question answering (e.g., for questions like How far is London from Edinburgh?, given a story in which both occur and can be resolved). Whereas temporal grounding has received considerable attention in the recent past (Mani and Wilson (2000); Setzer (2001)), robust spatial grounding has long been neglected. Concentrating on geographic names for populated places, I define the task of automatic Toponym Resolution (TR) as computing the mapping from occurrences of names for places as found in a text to a representation of the extensional  semantics of the location referred to (its referent), such as a geographic latitude/longitude footprint. The task of mapping from names to locations is hard due to insufficient and noisy databases, and a large degree of ambiguity: common words need to be distinguished from proper names (geo/non-geo ambiguity), and the mapping between names and locations is ambiguous (London can refer to the capital of the UK or to London, Ontario, Canada, or to about forty other Londons on earth). In addition, names of places and the boundaries referred to change over time, and databases are incomplete. Objective. I investigate how referentially ambiguous spatial named entities can be grounded, or resolved, with respect to an extensional coordinate model robustly on open-domain news text. I begin by comparing the few algorithms proposed in the literature, and, comparing semiformal, reconstructed descriptions of them, I factor out a shared repertoire of linguistic heuristics (e.g. rules, patterns) and extra-linguistic knowledge sources (e.g. population sizes). I then investigate how to combine these sources of evidence to obtain a superior method. I also investigate the noise effect introduced by the named entity tagging step that toponym resolution relies on in a sequential system pipeline architecture. Scope. In this thesis, I investigate a present-day snapshot of terrestrial geography as represented in the gazetteer defined and, accordingly, a collection of present-day news text. I limit the investigation to populated places; geo-coding of artifact names (e.g. airports or bridges), compositional geographic descriptions (e.g. 40 miles SW of London, near Berlin), for instance, is not attempted. Historic change is a major factor affecting gazetteer construction and ultimately toponym resolution. However, this is beyond the scope of this thesis. Method. While a small number of previous attempts have been made to solve the toponym resolution problem, these were either not evaluated, or evaluation was done by manual inspection of system output instead of curating a reusable reference corpus. Since the relevant literature is scattered across several disciplines (GIS, digital libraries, information retrieval, natural language processing) and descriptions of algorithms are mostly given in informal prose, I attempt to systematically describe them and aim at a reconstruction in a uniform, semi-formal pseudo-code notation for easier re-implementation. A systematic comparison leads to an inventory of heuristics and other sources of evidence. In order to carry out a comparative evaluation procedure, an evaluation resource is required. Unfortunately, to date no gold standard has been curated in the research community. To this end, a reference gazetteer and an associated novel reference corpus with human-labeled referent annotation are created. These are subsequently used to benchmark a selection of the reconstructed algorithms and a novel re-combination of the heuristics catalogued in the inventory. I then compare the performance of the same TR algorithms under three different conditions, namely applying it to the (i) output of human named entity annotation, (ii) automatic annotation using an existing Maximum Entropy sequence tagging model, and (iii) a na ̈ve toponym lookup procedure in a gazetteer. Evaluation. The algorithms implemented in this thesis are evaluated in an intrinsic or component evaluation. To this end, we define a task-specific matching criterion to be used with traditional Precision (P) and Recall (R) evaluation metrics. This matching criterion is lenient with respect to numerical gazetteer imprecision in situations where one toponym instance is marked up with different gazetteer entries in the gold standard and the test set, respectively, but where these refer to the same candidate referent, caused by multiple near-duplicate entries in the reference gazetteer. Main Contributions. The major contributions of this thesis are as follows:     • A new reference corpus in which instances of location named entities have been manually annotated with spatial grounding information for populated places, and an associated reference gazetteer, from which the assigned candidate referents are chosen. This reference gazetteer provides numerical latitude/longitude coordinates (such as 51◦ 32 North, 0◦ 5 West) as well as hierarchical path descriptions (such as London > UK) with respect to a world wide-coverage, geographic taxonomy constructed by combining several large,   but noisy gazetteers. This corpus contains news stories and comprises two sub-corpora, a subset of the REUTERS RCV1 news corpus used for the CoNLL shared task (Tjong Kim Sang and De Meulder (2003)), and a subset of the Fourth Message Understanding Contest (MUC-4; Chinchor (1995)), both available pre-annotated with gold-standard.   This corpus will be made available as a reference evaluation resource; • a new method and implemented system to resolve toponyms that is capable of robustly processing unseen text (open-domain online newswire text) and grounding toponym instances in an extensional model using longitude and latitude coordinates and hierarchical path descriptions, using internal (textual) and external (gazetteer) evidence; • an empirical analysis of the relative utility of various heuristic biases and other sources of evidence with respect to the toponym resolution task when analysing free news genre text; • a comparison between a replicated method as described in the literature, which functions a baseline, and a novel algorithm based on minimality heuristics; and • several exemplary prototypical applications to show how the resulting toponym resolution methods can be used to create visual surrogates for news stories, a geographic exploration tool for news browsing, geographically-aware document retrieval and to answer spatial questions (How far...?) in an open-domain question answering system. These applications only have demonstrative character, as a thorough quantitative, task-based (extrinsic) evaluation of the utility of automatic toponym resolution is beyond the scope of this thesis and left for future work. },
  author = {Leidner, Jochen Lothar},
  interhash = {4558afaf4c48986f34b04bf06169456e},
  intrahash = {c5f99e5f0fc60d29fcf730b968a95e90},
  school = {School of Informatics, University of Edinburgh},
  title = {Toponym Resolution in Text: Annotation, Evaluation and Applications of Spatial Grounding of Place Names},
  url = {http://www.era.lib.ed.ac.uk/bitstream/1842/1849/1/leidner-2007-phd.pdf},
  year = 2007
}

@inproceedings{garbin2005disambiguating,
  abstract = {This research is aimed at the problem of disambiguating toponyms (place names) in terms of a classification derived by merging information from two publicly available gazetteers. To establish the difficulty of the problem, we measured the degree of ambiguity, with respect to a gazetteer, for toponyms in news. We found that 67.82% of the toponyms found in a corpus that were ambiguous in a gazetteer lacked a local discriminator in the text. Given the scarcity of human-annotated data, our method used unsupervised machine learning to develop disambiguation rules. Toponyms were automatically tagged with information about them found in a gazetteer. A toponym that was ambiguous in the gazetteer was automatically disambiguated based on preference heuristics. This automatically tagged data was used to train a machine learner, which disambiguated toponyms in a human-annotated news corpus at 78.5% accuracy.},
  acmid = {1220621},
  address = {Stroudsburg, PA, USA},
  author = {Garbin, Eric and Mani, Inderjeet},
  booktitle = {Proceedings of the conference on Human Language Technology and Empirical Methods in Natural Language Processing},
  doi = {10.3115/1220575.1220621},
  interhash = {566910cb6e9745ee70da19d2ccafaffa},
  intrahash = {de574cf3bff3a3748fcd9bd5a9a0f3d1},
  location = {Vancouver, British Columbia, Canada},
  numpages = {8},
  pages = {363--370},
  publisher = {Association for Computational Linguistics},
  title = {Disambiguating toponyms in news},
  url = {http://dx.doi.org/10.3115/1220575.1220621},
  year = 2005
}

@article{song2012video,
  abstract = {This paper considers the problem of web video geolocation: we hope to determine where on the Earth a web video was taken. By analyzing a 6.5-million geotagged web video dataset, we observe that there exist inherent geography intimacies between a video with its relevant videos (related videos and same-author videos). This social relationship supplies a direct and effective cue to locate the video to a particular region on the earth. Based on this observation, we propose an effective web video geolocation algorithm by propagating geotags among the web video social relationship graph. For the video that have no geotagged relevant videos, we aim to collect those geotagged relevant images that are content similar with the video (share some visual or textual information with the video) as the cue to infer the location of the video. The experiments have demonstrated the effectiveness of both methods, with the geolocation accuracy much better than state-of-the-art approaches. Finally, an online web video geolocation system: Video2Locatoin (V2L) is developed to provide public access to our algorithm.},
  author = {Song, Yi-Cheng and Zhang, Yong-Dong and Cao, Juan and Xia, Tian and Liu, Wu and Li, Jin-Tao},
  doi = {10.1109/TMM.2011.2172937},
  interhash = {090791b9f4e0737f35e40af91c4475d2},
  intrahash = {40d777e2e4a83e28c75a1c8ba0554153},
  issn = {1520-9210},
  journal = {Transactions on Multimedia},
  month = apr,
  number = 2,
  pages = {456--470},
  publisher = {IEEE},
  title = {Web Video Geolocation by Geotagged Social Resources},
  url = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=6054059},
  volume = 14,
  year = 2012
}

@inproceedings{fink2009geolocation,
  abstract = {Understanding the spatial distribution of people who author social media content is of growing interest for researchers and commerce. Blogging platforms depend on authors reporting their own location. However, not all authors report or reveal their location on their blog's home page. Automated geolocation strategies using IP address and domain name are not adequate for determining an author's location because most blogs are not self-hosted. In this paper we describe a method that uses the place name mentions in a blog to determine an author's location. We achieved an accuracy of 63% on a collection of 844 blogs with known locations.},
  author = {Fink, C. and Piatko, C. and Mayfield, J. and Chou, D. and Finin, T. and Martineau, J.},
  booktitle = {Proceedings of the International Conference on Computational Science and Engineering},
  doi = {10.1109/CSE.2009.584},
  interhash = {59b768c08026047c20d472ff93a4d513},
  intrahash = {70eddd59803db7efee4b8c840fe5a79b},
  month = aug,
  pages = {1088--1092},
  title = {The Geolocation of Web Logs from Textual Clues},
  url = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=5282996},
  volume = 4,
  year = 2009
}

@incollection{ireson2010toponym,
  abstract = {Increasingly user-generated content is being utilised as a source of information, however each individual piece of content tends to contain low levels of information. In addition, such information tends to be informal and imperfect in nature; containing imprecise, subjective, ambiguous expressions. However the content does not have to be interpreted in isolation as it is linked, either explicitly or implicitly, to a network of interrelated content; it may be grouped or tagged with similar content, comments may be added by other users or it may be related to other content posted at the same time or by the same author or members of the author’s social network. This paper generally examines how ambiguous concepts within user-generated content can be assigned a specific/formal meaning by considering the expanding context of the information, i.e. other information contained within directly or indirectly related content, and specifically considers the issue of toponym resolution of locations.},
  address = {Berlin/Heidelberg},
  affiliation = {University of Sheffield, UK},
  author = {Ireson, Neil and Ciravegna, Fabio},
  booktitle = {The Semantic Web - ISWC 2010},
  doi = {10.1007/978-3-642-17746-0_24},
  editor = {Patel-Schneider, Peter and Pan, Yue and Hitzler, Pascal and Mika, Peter and Zhang, Lei and Pan, Jeff and Horrocks, Ian and Glimm, Birte},
  interhash = {fd064c5fb724a5a72a6a67d1f6a7f8df},
  intrahash = {3d94c7d94945f6e4f37f4391829318e7},
  isbn = {978-3-642-17745-3},
  keyword = {Computer Science},
  pages = {370--385},
  publisher = {Springer},
  series = {Lecture Notes in Computer Science},
  title = {Toponym Resolution in Social Media},
  url = {http://dx.doi.org/10.1007/978-3-642-17746-0_24},
  volume = 6496,
  year = 2010
}

@inproceedings{martins2008extracting,
  abstract = {Geo-temporal criteria are important for filtering, grouping and prioritizing information resources. This presents techniques for extracting semantic geo-temporal information from text, using simple text mining methods that leverage on a gazetteer. A prototype system, implementing the proposed methods and capable of displaying information over maps and timelines, is described. This prototype can take input in RSS, demonstrating the application to content from many different online sources. Experimental results demonstrate the efficiency and accuracy of the proposed approaches.},
  author = {Martins, B. and Manguinhas, H. and Borbinha, J.},
  booktitle = {Proceedings of the International Conference on Semantic Computing},
  doi = {10.1109/ICSC.2008.86},
  interhash = {d03fecb6b3261ffa0a5e11789b188883},
  intrahash = {5a889bc7d9e81cb1d294cb83b767bf64},
  month = aug,
  pages = {1--9},
  publisher = {IEEE Computer Society},
  title = {Extracting and Exploring the Geo-Temporal Semantics of Textual Resources},
  url = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=4597167},
  year = 2008
}

@inproceedings{tezuka2001webbased,
  abstract = {Dealing with prepositions such as "near", "between" and "in front of" is very important in geographic information systems (GISs). In most systems, real-world distances are used to handle these prepositions. One of the difficulties in processing these prepositions lies in the fact that their geographical range is distorted in people's cognitive maps. For example, the size of an area referred to by the preposition "near" gets narrowed when a more famous landmark exists right next to the base geographical object. This is because users are likely to choose the most famous landmark when referring to a certain position. Also, the area referred to by "between" is not a straight line; it curves along the most commonly used pathway between the base objects. The difference in the popularity of geographical objects is the main reason for causing such distortions in cognitive maps. Since there is a large amount of data on the World Wide Web, we believe that such conceptual distortion can be calculated by analyzing Web data. Popularity and co-occurrence rates are calculated through their frequency in Web resources. Inference rules are set to restrict the target of conceptual prepositions using GISs and information obtained from the Web},
  author = {Tezuka, T. and Lee, Ryong and Kambayashi, Y. and Takakura, H.},
  booktitle = {Proceedings of the Second International Conference on Web Information Systems Engineering},
  doi = {10.1109/WISE.2001.996692},
  interhash = {132a7e8b5e47313ce56c790188d4d384},
  intrahash = {b5b4d65538c9253a2b43c6252521d4f4},
  month = dec,
  pages = {14--21},
  title = {Web-based inference rules for processing conceptual geographical relationships},
  url = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=996692&tag=1},
  volume = 2,
  year = 2001
}

@inproceedings{clough2004proposal,
  author = {Clough, Paul and Sanderson, Mark},
  booktitle = {Proceedings of the Workshop on Geographic Information Retrieval},
  interhash = {d6d904074f6bd0fa1cee9c418a140ea4},
  intrahash = {b7da956af5ed967d695770694f6ad783},
  month = jul,
  title = {A proposal for comparative evaluation of automatic annotation for geo-referenced documents},
  url = {http://eprints.whiterose.ac.uk/4522/},
  year = 2004
}

@article{goodwin2008geographical,
  abstract = {Ordnance Survey, the national mapping agency of Great Britain, is investigating how semantic web technologies assist its role as a geographical information provider. A major part of this work involves the development of prototype products and datasets in RDF. This article discusses the production of an example dataset for the administrative geography of Great Britain, demonstrating the advantages of explicitly encoding topological relations between geographic entities over traditional spatial queries. We also outline how these data can be linked to other datasets on the web of linked data and some of the challenges that this raises.},
  author = {Goodwin, John and Dolbear, Catherine and Hart, Glen},
  doi = {10.1111/j.1467-9671.2008.01133.x},
  interhash = {ea248d549690eceb8e7aa06ccb24e226},
  intrahash = {08412bb4afca1e86d0cca0a8a083f2a2},
  issn = {1467-9671},
  journal = {Transactions in GIS},
  pages = {19--30},
  publisher = {Blackwell Publishing Ltd},
  title = {Geographical Linked Data: The Administrative Geography of Great Britain on the Semantic Web},
  url = {http://dx.doi.org/10.1111/j.1467-9671.2008.01133.x},
  volume = 12,
  year = 2008
}

@mastersthesis{flohr2011extraktion,
  abstract = {Informationen so aufzubereiten, dass sie für eine bestimmte Situation nützlich sind, ist eine große Herausforderung. In solchen Situationen soll ein Benutzer, wenn er sich an einem fremden Ort befindet, mit Hilfe des Android Smartphone interessante und wis- senswerte Informationen anzeigen lassen. Um dies bewerkstelligen zu können, muss es eine georeferenzierte Informationsquelle geben. Außerdem muss ein Konzept vor- handen sein, um diese Daten zu sammeln und so aufzubereiten, dass der Benutzer diese auch nützlich findet. Es muss eine Visualisierung dieser Daten geben, da der Platz zur Anzeige auf Smartphones sehr begrenzt ist. Als georeferenzierte Informationsquelle wird die Online-Enzyklopädie Wikipedia ge- nutzt, diese ist frei zugänglich und auch sehr umfassend. In dieser Arbeit wird das Konzept zur Sammlung und Aufbereitung von relevanten Daten behandelt. Zur In- formationsvisualisierung wird die Methode der Schlagwortwolke (engl. Tag-Cloud) verwendet.   It is a major challenge to prepare useful information for a particular situation. In this situation an Android smartphone user wants to display interesting and important facts about an unknown place. To manage this task existence of a geo-referenced source of information has to be ensured. In order to collect and prepare this data a creation of concept is needed. Due to limited display space, it is necessary to construct a suitable visualization of this data. Wikipedia is used as a geo-referenced information resource, because it has open-access and it offers global geo-referenced information. This thesis covers the concept of col- lecting and preparing relevant data. To visualize information a tag cloud is used.  },
  author = {Flohr, Oliver},
  interhash = {5d1f4da4964062ed6598fe8d8be8b591},
  intrahash = {a28959724af1907e7fc67a68e648c14c},
  month = aug,
  school = {Gottfried Wilhelm Leibniz Universität Hannover},
  title = {Extraktion und Visualisierung ortsbezogener Informationen mit Tag-Clouds},
  type = {bachelor thesis},
  url = {http://www.se.uni-hannover.de/pub/File/pdfpapers/Flohr2011a.pdf},
  year = 2011
}

@book{lynch1992image,
  author = {Lynch, Kevin},
  interhash = {7e2c57af8a2ba9fde0cfa4f401cdc72f},
  intrahash = {2e9cc23aad987c883011884d6375dc4d},
  isbn = {9780262620017},
  publisher = {MIT Press},
  title = {The image of the city},
  url = {http://books.google.de/books?id=\_phRPWsSpAgC},
  year = 1992
}

@book{nold2009emotional,
  abstract = {Emotional Cartography is a collection of essays from artists, designers, psychogeographers, cultural researchers, futurologists and neuroscientists, brought together by Christian Nold, to explore the political, social and cultural implications of visualising intimate biometric data and emotional experiences using technology.},
  author = {{Raqs Media Collective} and van de Drift, Marcel and Davis, Stephen Boyd and van Kranenburg, Rob and Hope, Sophie and Stafford, Tom},
  editor = {Nold, Christian},
  interhash = {8c669a3071f62b0ffe48214962e86443},
  intrahash = {4610971f7f16acebe542ffe89390992e},
  isbn = {978-0-9557623-1-4},
  title = {Emotional Cartography - Technologies of the Self},
  url = {http://emotionalcartography.net/},
  year = 2009
}

@book{haklay2010interacting,
  address = {Chichester, West Sussex, UK},
  author = {Haklay, Mordechai},
  interhash = {ee82b2b82e055fe1fa57c11ec1315122},
  intrahash = {abd409ed5340f6563d5d82146c3e1990},
  isbn = {9780470998243 0470998245},
  publisher = {John Wiley},
  refid = {466334086},
  title = {Interacting with geospatial technologies},
  url = {http://www.amazon.co.uk/gp/product/0470998245/},
  year = 2010
}

@inproceedings{rattenbury2007towards,
  abstract = {We describe an approach for extracting semantics of tags, unstructured text-labels assigned to resources on the Web, based on each tag's usage patterns. In particular, we focus on the problem of extracting place and event semantics for tags that are assigned to photos on Flickr, a popular photo sharing website that supports time and location (latitude/longitude) metadata. We analyze two methods inspired by well-known burst-analysis techniques and one novel method: Scale-structure Identification. We evaluate the methods on a subset of Flickr data, and show that our Scale-structure Identification method outperforms the existing techniques. The approach and methods described in this work can be used in other domains such as geo-annotated web pages, where text terms can be extracted and associated with usage patterns.},
  address = {New York, NY, USA},
  author = {Rattenbury, Tye and Good, Nathaniel and Naaman, Mor},
  booktitle = {SIGIR '07: Proceedings of the 30th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval},
  doi = {10.1145/1277741.1277762},
  interhash = {8b02d2b3fdbb97c3db6e3b23079a56e5},
  intrahash = {bf6f73d2ef74ca6f1d355fb5688b673c},
  isbn = {978-1-59593-597-7},
  pages = {103--110},
  publisher = {ACM Press},
  title = {Towards automatic extraction of event and place semantics from flickr tags},
  url = {http://dx.doi.org/10.1145/1277741.1277762},
  year = 2007
}

@misc{maue2009recommending,
  author = {Maué, Patrick and Keßler, Carsten},
  howpublished = {submitted for publication},
  interhash = {21b16cea0d6e1f1812a9ce0f10d90fac},
  intrahash = {89a962ddee414305418e2ac03b1e9a42},
  title = {Recommending Semantic Annotations for Geographic Information},
  url = {http://musil.uni-muenster.de/wp-content/uploads/recommending.pdf},
  year = 2009
}

@article{michael2007citizens,
  abstract = {In recent months there has been an explosion of interest in using the Web to create, assemble, and disseminate geographic
information provided voluntarily by individuals. Sites such as Wikimapia and OpenStreetMap are empowering citizens to createa global patchwork of geographic information, while Google Earth and other virtual globes are encouraging volunteers to developinteresting applications using their own data. I review this phenomenon, and examine associated issues: what drives peopleto do this, how accurate are the results, will they threaten individual privacy, and how can they augment more conventionalsources? I compare this new phenomenon to more traditional citizen science and the role of the amateur in geographic observation.},
  author = {Goodchild, Michael},
  doi = {10.1007/s10708-007-9111-y},
  interhash = {5b22a56fff9318036a29a21923c59678},
  intrahash = {c35939fb7621bf75eebd4ed62febeb39},
  issn = {0343-2521},
  journal = {GeoJournal},
  month = aug,
  number = 4,
  pages = {211--221},
  title = {Citizens as sensors: the world of volunteered geography},
  url = {http://www.springerlink.com/content/h013jk125081j628/},
  volume = 69,
  year = 2007
}

@techreport{ieKey,
  author = {Groh, Georg},
  institution = {TU München},
  interhash = {7507ea3706a7cc5aaae769370f0671b1},
  intrahash = {87835f6fce05f443a4956673662734d2},
  month = {March},
  title = {Ortsbezug in kontext-sensitiven Diensten für mobile Communities},
  type = {8. Münchner Fortbildungsseminar Geoinformationssysteme},
  year = 2003
}