@misc{turney2010frequency, abstract = { Computers understand very little of the meaning of human language. This profoundly limits our ability to give instructions to computers, the ability of computers to explain their actions to us, and the ability of computers to analyse and process text. Vector space models (VSMs) of semantics are beginning to address these limits. This paper surveys the use of VSMs for semantic processing of text. We organize the literature on VSMs according to the structure of the matrix in a VSM. There are currently three broad classes of VSMs, based on term-document, word-context, and pair-pattern matrices, yielding three classes of applications. We survey a broad range of applications in these three categories and we take a detailed look at a specific open source project in each category. Our goal in this survey is to show the breadth of applications of VSMs for semantics, to provide a new perspective on VSMs for those who are already familiar with the area, and to provide pointers into the literature for those who are less familiar with the field. }, author = {Turney, Peter D. and Pantel, Patrick}, interhash = {397ead0766aba687b471395729a263d1}, intrahash = {917bb6b225b8c844b1a15b6577b3845b}, note = {cite arxiv:1003.1141}, title = {From Frequency to Meaning: Vector Space Models of Semantics}, url = {http://arxiv.org/abs/1003.1141}, year = 2010 } @inproceedings{suchanek2008social, abstract = {This paper aims to quantify two common assumptions about social tagging: (1) that tags are "meaningful" and (2) that the tagging process is influenced by tag suggestions. For (1), we analyze the semantic properties of tags and the relationship between the tags and the content of the tagged page. Our analysis is based on a corpus of search keywords, contents, titles, and tags applied to several thousand popular Web pages. Among other results, we find that the more popular tags of a page tend to be the more meaningful ones. For (2), we develop a model of how the influence of tag suggestions can be measured. From a user study with over 4,000 participants, we conclude that roughly one third of the tag applications may be induced by the suggestions. Our results would be of interest for designers of social tagging systems and are a step towards understanding how to best leverage social tags for applications such as search and information extraction.}, acmid = {1458114}, address = {New York, NY, USA}, author = {Suchanek, Fabian M. and Vojnovic, Milan and Gunawardena, Dinan}, booktitle = {Proceeding of the 17th ACM conference on Information and knowledge management}, doi = {10.1145/1458082.1458114}, interhash = {1bca5a66a6a562258e0c0357545fed34}, intrahash = {ff31cf8541004adc7cd712ed715706b3}, isbn = {978-1-59593-991-3}, location = {Napa Valley, California, USA}, numpages = {10}, pages = {223--232}, publisher = {ACM}, series = {CIKM '08}, title = {Social tags: meaning and suggestions}, url = {http://doi.acm.org/10.1145/1458082.1458114}, year = 2008 } @inproceedings{bullinaria2008semantic, author = {Bullinaria, J.A.}, file = {bullinaria2008semantic.pdf:bullinaria2008semantic.pdf:PDF}, groups = {public}, interhash = {cdb7b1ff0e89f61f84e2c15a0e46c221}, intrahash = {efae206c0f89363a3273a8d57c87eff5}, journal = {ESSLLI Workshop on Distributional Lexical Semantics}, timestamp = {2011-01-28 09:53:43}, title = {Semantic Categorization Using Simple Word Co-occurrence statistics}, username = {dbenz}, year = 2008 } @article{bullinaria2007extracting, author = {Bullinaria, J.A. and Levy, J.P.}, interhash = {913a28789f70f22c7f7b927ffe936116}, intrahash = {9697b015d4d27fabe5f7cf1847c45157}, journal = {Behavior Research Methods}, number = 3, pages = 510, publisher = {Psychonomic Society Publications}, title = {Extracting semantic representations from word co-occurrence statistics: A computational study}, year = 2007 } @article{evert2004statistics, author = {Evert, S.}, interhash = {1e2f18c86df238b8759858ec68911893}, intrahash = {660ddcd65df845d9d3224430c96d2b7b}, journal = {Unpublished doctoral dissertation, Institut f{\\"u}r maschinelle Sprachverarbeitung, Universit{\\"a}t Stuttgart}, title = {{The statistics of word cooccurrences: word pairs and collocations}}, url = {http://scholar.google.de/scholar.bib?q=info:6tx-Vnyw1ooJ:scholar.google.com/&output=citation&hl=de&as_sdt=2000&ct=citation&cd=0}, year = 2004 } @article{lund1996producing, author = {Lund, K. and Burgess, C.}, interhash = {77e182745556c0a24e0e67a22652a66e}, intrahash = {70b626b5e3a8225516679678fb26a830}, journal = {Behavior Research Methods Instruments and Computers}, number = 2, pages = {203--208}, publisher = {Austin, Tex.: The Society, c1984-c2004.}, title = {{Producing high-dimensional semantic spaces from lexical co-occurrence}}, url = {http://scholar.google.de/scholar.bib?q=info:BfG544ylGnkJ:scholar.google.com/&output=citation&hl=de&as_sdt=2000&as_vis=1&ct=citation&cd=0}, volume = 28, year = 1996 } @inproceedings{hu2008enhancing, author = {Hu, Jian and Fang, Lujun and Cao, Yang and Zeng, Hua-Jun and Li, Hua and Yang, Qiang and Chen, Zheng}, booktitle = {SIGIR}, crossref = {conf/sigir/2008}, editor = {Myaeng, Sung-Hyon and Oard, Douglas W. and Sebastiani, Fabrizio and Chua, Tat-Seng and Leong, Mun-Kew}, ee = {http://doi.acm.org/10.1145/1390334.1390367}, interhash = {0a2878165034dcdfacb9045608ec482a}, intrahash = {76f863a12c0b983ec67682deaec1ada4}, isbn = {978-1-60558-164-4}, pages = {179-186}, publisher = {ACM}, title = {Enhancing text clustering by leveraging Wikipedia semantics.}, url = {http://dblp.uni-trier.de/db/conf/sigir/sigir2008.html#HuFCZLYC08}, year = 2008 } @book{helbig2008wissensverarbeitung, abstract = {Das Buch gibt eine umfassende Darstellung einer Methodik zur Interpretation und Bedeutungsrepr{\"a}sentation nat{\"u}rlichsprachlicher Ausdr{\"u}cke. Diese Methodik der Mehrschichtigen Erweiterten Semantischen Netze (MultiNet) ist sowohl f{\"u}r theoretische Untersuchungen als auch f{\"u}r die automatische Verarbeitung nat{\"u}rlicher Sprache auf dem Rechner geeignet. Die vorgestellten Ergebnisse sind eingebettet in ein System von Software-Werkzeugen, die eine praktische Nutzung der MultiNet-Darstellungsmittel als Formalismus zur Bedeutungsrepr{\"a}sentation sichern. Hierzu geh{\"o}ren: eine Werkbank f{\"u}r den Wissensingenieur, ein {\"U}bersetzungssystem zur automatischen Gewinnung von Bedeutungsdarstellungen nat{\"u}rlichsprachlicher S{\"a}tze und eine Werkbank f{\"u}r den Computerlexikographen.}, address = {Berlin}, author = {Helbig, Hermann}, doi = {10.1007/978-3-540-76278-2}, edition = {2.}, file = {Amazon Search inside:http\://www.amazon.de/gp/reader/3540762760/:URL}, interhash = {f7c09fb5257be21200f6a9622c5d301c}, intrahash = {6eff05ef4aa01e934aa45df7a7ad3154}, isbn = {3-540-76276-0}, publisher = {Springer}, title = {{Wissensverarbeitung und die Semantik der nat{\"u}rlichen Sprache: Wissensrepr{\"a}sentation mit MultiNet}}, year = 2008 } @article{bullinaria2008semantic, author = {Bullinaria, J.A.}, file = {bullinaria2008semantic.pdf:bullinaria2008semantic.pdf:PDF}, interhash = {cdb7b1ff0e89f61f84e2c15a0e46c221}, intrahash = {efae206c0f89363a3273a8d57c87eff5}, journal = {ESSLLI Workshop on Distributional Lexical Semantics}, title = {Semantic Categorization Using Simple Word Co-occurrence statistics}, year = 2008 } @inproceedings{wagner2010wisdom, abstract = {Although one might argue that little wisdom can be conveyed in messages of 140 characters or less, this paper sets out to explore whether the aggregation of messages in social awareness streams, such as Twitter, conveys meaningful information about a given domain. As a research community, we know little about the structural and semantic properties of such streams, and how they can be analyzed, characterized and used. This paper introduces a network-theoretic model of social awareness stream, a so-called \tweetonomy", together with a set of stream-based measures that allow researchers to systematically define and compare different stream aggregations. We apply the model and measures to a dataset acquired from Twitter to study emerging semantics in selected streams. The network-theoretic model and the corresponding measures introduced in this paper are relevant for researchers interested in information retrieval and ontology learning from social awareness streams. Our empirical findings demonstrate that different social awareness stream aggregations exhibit interesting differences, making them amenable for different applications.}, author = {Wagner, C. and Strohmaier, M.}, booktitle = {Proc. of the Semantic Search 2010 Workshop (SemSearch2010)}, file = {wagner2010wisdom.pdf:wagner2010wisdom.pdf:PDF}, groups = {public}, interhash = {02c222a4f9abd5964ea61af034769af4}, intrahash = {2f96232a648d4fd1617c389d899f3d2b}, location = {Raleigh, NC, USA}, month = {april}, timestamp = {2010-04-19 08:03:47}, title = {The Wisdom in Tweetonomies: Acquiring Latent Conceptual Structures from Social Awareness Streams}, url = {http://mstrohm.wordpress.com/2010/04/17/on-taxonomies-folksonomies-and-tweetonomies/}, username = {dbenz}, year = 2010 }