@misc{turney2010frequency, abstract = { Computers understand very little of the meaning of human language. This profoundly limits our ability to give instructions to computers, the ability of computers to explain their actions to us, and the ability of computers to analyse and process text. Vector space models (VSMs) of semantics are beginning to address these limits. This paper surveys the use of VSMs for semantic processing of text. We organize the literature on VSMs according to the structure of the matrix in a VSM. There are currently three broad classes of VSMs, based on term-document, word-context, and pair-pattern matrices, yielding three classes of applications. We survey a broad range of applications in these three categories and we take a detailed look at a specific open source project in each category. Our goal in this survey is to show the breadth of applications of VSMs for semantics, to provide a new perspective on VSMs for those who are already familiar with the area, and to provide pointers into the literature for those who are less familiar with the field. }, author = {Turney, Peter D. and Pantel, Patrick}, interhash = {397ead0766aba687b471395729a263d1}, intrahash = {917bb6b225b8c844b1a15b6577b3845b}, note = {cite arxiv:1003.1141}, title = {From Frequency to Meaning: Vector Space Models of Semantics}, url = {http://arxiv.org/abs/1003.1141}, year = 2010 } @inproceedings{suchanek2008social, abstract = {This paper aims to quantify two common assumptions about social tagging: (1) that tags are "meaningful" and (2) that the tagging process is influenced by tag suggestions. For (1), we analyze the semantic properties of tags and the relationship between the tags and the content of the tagged page. Our analysis is based on a corpus of search keywords, contents, titles, and tags applied to several thousand popular Web pages. Among other results, we find that the more popular tags of a page tend to be the more meaningful ones. For (2), we develop a model of how the influence of tag suggestions can be measured. From a user study with over 4,000 participants, we conclude that roughly one third of the tag applications may be induced by the suggestions. Our results would be of interest for designers of social tagging systems and are a step towards understanding how to best leverage social tags for applications such as search and information extraction.}, acmid = {1458114}, address = {New York, NY, USA}, author = {Suchanek, Fabian M. and Vojnovic, Milan and Gunawardena, Dinan}, booktitle = {Proceeding of the 17th ACM conference on Information and knowledge management}, doi = {10.1145/1458082.1458114}, interhash = {1bca5a66a6a562258e0c0357545fed34}, intrahash = {ff31cf8541004adc7cd712ed715706b3}, isbn = {978-1-59593-991-3}, location = {Napa Valley, California, USA}, numpages = {10}, pages = {223--232}, publisher = {ACM}, series = {CIKM '08}, title = {Social tags: meaning and suggestions}, url = {http://doi.acm.org/10.1145/1458082.1458114}, year = 2008 } @phdthesis{yeung2009from, author = {Yeung, Ching Man Au}, file = {yeung2009from.pdf:yeung2009from.pdf:PDF}, groups = {public}, interhash = {5d149e7d1b58b1452a166c9e8fcb1196}, intrahash = {08831b7e19efd4e6b149fb4af180a5af}, school = {University of Southampton}, timestamp = {2011.07.29}, title = {From User Behaviours to Collective Semantics}, username = {dbenz}, year = 2009 } @article{levy2008learning, author = {Levy, M. and Sandler, M.}, file = {levy2008learning.pdf:levy2008learning.pdf:PDF}, groups = {public}, interhash = {82ca1eaa0983bf17582b4b02597f2a1d}, intrahash = {0681ab4879e2378295f724eb73e7360c}, journal = {Journal of New Music Research}, number = 2, pages = {137--150}, publisher = {Routledge, part of the Taylor \& Francis Group}, title = {Learning latent semantic models for music from social tags}, username = {dbenz}, volume = 37, year = 2008 } @phdthesis{abbasi2010discovering, author = {Abbasi, Rabeeh Ayaz}, file = {abbasi2010discovering.pdf:abbasi2010discovering.pdf:PDF}, groups = {public}, interhash = {f92b84fefbec67183c025eded23d18ec}, intrahash = {52aceb02e936388a32e9ef9e1353ba93}, school = {Universit¨at Koblenz-Landau}, timestamp = {2011.07.29}, title = {Discovering and Exploiting Semantics in Folksonomies}, username = {dbenz}, year = 2010 } @article{staab2002emergent, author = {Staab, S. and Santini, S. and Nack, F. and Steels, L. and Maedche, A.}, interhash = {4bdf6565939f3a3563deaff00c93615b}, intrahash = {c8f3c61c114cd2aca75c08f05216a569}, journal = {Intelligent Systems, IEEE}, number = 1, pages = {78--86}, publisher = {IEEE}, title = {Emergent semantics}, volume = 17, year = 2002 } @inproceedings{aberer2004emergent, author = {Aberer, Karl and Cudr\'e-Mauroux, Philippe and Ouksel, Aris M. and Catarci, Tiziana and Hacid, Mohand-Said and Illarramendi, Arantza and Kashyap, Vipul and Mecella, Massimo and Mena, Eduardo and Neuhold, Erich J. and Troyer, Olga De and Risse, Thomas and Scannapieco, Monica and Saltor, F\`elix and Santis, Luca De and Spaccapietra, Stefano and Staab, Steffen and Studer, Rudi}, booktitle = {Proceedings of the 9th International Conference on Database Systems for Advanced Applications (DASFAA'04)}, editor = {Lee, Yoon-Joon and Li, Jianzhong and Whang, Kyu-Young and Lee, Doheon}, interhash = {4a644b67e30bfb1c342413b139b46270}, intrahash = {3dd280a5313df86cd5747e5bc91bc5c6}, isbn = {3-540-21047-4}, pages = {25-38}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, title = {Emergent Semantics Principles and Issues.}, volume = 2973, year = 2004 } @book{cudre-mauroux2008emergent, asin = {1420092278}, author = {Cudre-Mauroux, Philippe}, dewey = {620}, ean = {9781420092271}, edition = 1, interhash = {888c26936598ddf7ebc070cfc3dba066}, intrahash = {df3ecaa546c93b3735e51c6ae6080700}, isbn = {1420092278}, publisher = {EPFL Press}, title = {Emergent Semantics (Computer and Communication Sciences)}, url = {http://www.amazon.com/Emergent-Semantics-Computer-Communication-Sciences/dp/1420092278/ref=sr_1_1?ie=UTF8&qid=1310056948&sr=8-1}, year = 2008 } @inproceedings{aurnhammer2006augmenting, abstract = {We propose an approach that unifies browsing by tags and visual features for intuitive exploration of image databases. Incontrast to traditional image retrieval approaches, we utilise tags provided by users on collaborative tagging sites, complementedby simple image analysis and classification. This allows us to find new relations between data elements. We introduce theconcept of a navigation map, that describes links between users, tags, and data elements for the example of the collaborativetagging site Flickr. We show that introducing similarity search based on image features yields additional links on this map.These theoretical considerations are supported by examples provided by our system, using data and tags from real Flickr users.}, author = {Aurnhammer, Melanie and Hanappe, Peter and Steels, Luc}, file = {aurnhammer2006augmenting.pdf:aurnhammer2006augmenting.pdf:PDF}, groups = {public}, interhash = {a9d35e917da138f929b5d81f1dab4fd0}, intrahash = {a286ce64106a503e135e7114365c77b2}, journal = {The Semantic Web - ISWC 2006}, pages = {58--71}, timestamp = {2009-08-11 18:38:56}, title = {Augmenting Navigation for Collaborative Tagging with Emergent Semantics}, url = {http://dx.doi.org/10.1007/11926078_5}, username = {dbenz}, volume = 4273, year = 2006 } @inproceedings{bullinaria2008semantic, author = {Bullinaria, J.A.}, file = {bullinaria2008semantic.pdf:bullinaria2008semantic.pdf:PDF}, groups = {public}, interhash = {cdb7b1ff0e89f61f84e2c15a0e46c221}, intrahash = {efae206c0f89363a3273a8d57c87eff5}, journal = {ESSLLI Workshop on Distributional Lexical Semantics}, timestamp = {2011-01-28 09:53:43}, title = {Semantic Categorization Using Simple Word Co-occurrence statistics}, username = {dbenz}, year = 2008 } @inproceedings{zhou2008unsupervised, abstract = {This paper deals with the problem of exploring hierarchical semantics from social annotations. Recently, social annotationservices have become more and more popular in Semantic Web. It allows users to arbitrarily annotate web resources, thus, largelylowers the barrier to cooperation. Furthermore, through providing abundant meta-data resources, social annotation might becomea key to the development of Semantic Web. However, on the other hand, social annotation has its own apparent limitations,for instance, 1) ambiguity and synonym phenomena and 2) lack of hierarchical information. In this paper, we propose an unsupervisedmodel to automatically derive hierarchical semantics from social annotations. Using a social bookmark service Del.icio.usas example, we demonstrate that the derived hierarchical semantics has the ability to compensate those shortcomings. We furtherapply our model on another data set from Flickr to testify our model’s applicability on different environments. The experimentalresults demonstrate our model’s efficiency.}, author = {Zhou, Mianwei and Bao, Shenghua and Wu, Xian and Yu, Yong}, file = {zhou2008unsupervised.pdf:zhou2008unsupervised.pdf:PDF}, groups = {public}, interhash = {e8397fd51d43531b91e81776c879f487}, intrahash = {ee6da1cc1300cf4fb68fc58d5e2bb819}, journal = {The Semantic Web}, pages = {680--693}, timestamp = {2009-09-24 23:27:32}, title = {An Unsupervised Model for Exploring Hierarchical Semantics from Social Annotations}, url = {http://dx.doi.org/10.1007/978-3-540-76298-0_49}, username = {dbenz}, year = 2008 } @article{raysonecember2008from, abstract = {This paper reports the extension of the key words method for the comparison of corpora. Using automatic tagging software that assigns part-of-speech and semantic field (domain) tags, a method is described which permits the extraction of key domains by applying the keyness calculation to tag frequency lists. The combination of the key words and key domains methods is shown to allow macroscopic analysis (the study of the characteristics of whole texts or varieties of language) to inform the microscopic level (focussing on the use of a particular linguistic feature) and thereby suggesting those linguistic features which should be investigated further. The resulting 'data-driven' approach presented here combines elements of both the 'corpus-based' and 'corpus-driven' paradigms in corpus linguistics. A web-based tool, Wmatrix, implementing the proposed method is applied in a case study: the comparison of UK 2001 general election manifestos of the Labour and Liberal Democratic parties.}, author = {Rayson, Paul}, doi = {10.1075/ijcl.13.4.06ray}, groups = {public}, interhash = {dff324bd5ca64c55a2e491e439a7b5c8}, intrahash = {753a948e9239f56f7d29b1d24bebb2a9}, journal = {International Journal of Corpus Linguistics}, journalpub = {1}, pages = {519-549(31)}, title = {From key words to key semantic domains}, url = {http://www.ingentaconnect.com/content/jbp/ijcl/2008/00000013/00000004/art00005}, username = {dbenz}, volume = 13, year = 2008 } @inproceedings{benz2010semantics, address = {Raleigh, NC, USA}, author = {Benz, Dominik and Hotho, Andreas and Stützer, Stefan and Stumme, Gerd}, booktitle = {Proceedings of the 2nd Web Science Conference (WebSci10)}, file = {benz2010semantics.pdf:benz2010semantics.pdf:PDF}, interhash = {d4a2f14bb27ce220ba43f651e42aeddc}, intrahash = {16c77e486fb8bc527eb7734b153932ab}, title = {Semantics made by you and me: Self-emerging ontologies can capture the diversity of shared knowledge}, url = {http://www.kde.cs.uni-kassel.de/pub/pdf/benz2010semantics.pdf}, year = 2010 } @article{bullinaria2007extracting, author = {Bullinaria, J.A. and Levy, J.P.}, interhash = {913a28789f70f22c7f7b927ffe936116}, intrahash = {9697b015d4d27fabe5f7cf1847c45157}, journal = {Behavior Research Methods}, number = 3, pages = 510, publisher = {Psychonomic Society Publications}, title = {Extracting semantic representations from word co-occurrence statistics: A computational study}, year = 2007 } @article{evert2004statistics, author = {Evert, S.}, interhash = {1e2f18c86df238b8759858ec68911893}, intrahash = {660ddcd65df845d9d3224430c96d2b7b}, journal = {Unpublished doctoral dissertation, Institut f{\\"u}r maschinelle Sprachverarbeitung, Universit{\\"a}t Stuttgart}, title = {{The statistics of word cooccurrences: word pairs and collocations}}, url = {http://scholar.google.de/scholar.bib?q=info:6tx-Vnyw1ooJ:scholar.google.com/&output=citation&hl=de&as_sdt=2000&ct=citation&cd=0}, year = 2004 } @article{lund1996producing, author = {Lund, K. and Burgess, C.}, interhash = {77e182745556c0a24e0e67a22652a66e}, intrahash = {70b626b5e3a8225516679678fb26a830}, journal = {Behavior Research Methods Instruments and Computers}, number = 2, pages = {203--208}, publisher = {Austin, Tex.: The Society, c1984-c2004.}, title = {{Producing high-dimensional semantic spaces from lexical co-occurrence}}, url = {http://scholar.google.de/scholar.bib?q=info:BfG544ylGnkJ:scholar.google.com/&output=citation&hl=de&as_sdt=2000&as_vis=1&ct=citation&cd=0}, volume = 28, year = 1996 } @inproceedings{hu2008enhancing, author = {Hu, Jian and Fang, Lujun and Cao, Yang and Zeng, Hua-Jun and Li, Hua and Yang, Qiang and Chen, Zheng}, booktitle = {SIGIR}, crossref = {conf/sigir/2008}, editor = {Myaeng, Sung-Hyon and Oard, Douglas W. and Sebastiani, Fabrizio and Chua, Tat-Seng and Leong, Mun-Kew}, ee = {http://doi.acm.org/10.1145/1390334.1390367}, interhash = {0a2878165034dcdfacb9045608ec482a}, intrahash = {76f863a12c0b983ec67682deaec1ada4}, isbn = {978-1-60558-164-4}, pages = {179-186}, publisher = {ACM}, title = {Enhancing text clustering by leveraging Wikipedia semantics.}, url = {http://dblp.uni-trier.de/db/conf/sigir/sigir2008.html#HuFCZLYC08}, year = 2008 } @article{carroll2005named, abstract = {The Semantic Web consists of many RDF graphs nameable by URIs. This paper extends the syntax and semantics of RDF to cover such named graphs. This enables RDF statements that describe graphs, which is beneficial in many Semantic Web application areas. Named graphs are given an abstract syntax, a formal semantics, an XML syntax, and a syntax based on N3. SPARQL is a query language applicable to named graphs. A specific application area discussed in detail is that of describing provenance information. This paper provides a formally defined framework suited to being a foundation for the Semantic Web trust layer.}, author = {Carroll, Jeremy J. and Bizer, Christian and Hayes, Pat and Stickler, Patrick}, interhash = {fbe2169cf24955acfa2c6462eee5f442}, intrahash = {4a9b2ec8532c73db270604ddfc2550cf}, journal = {Journal of Web Semantics}, number = 4, pages = {247-267}, title = {Named Graphs}, url = {http://citeseerx.ist.psu.edu/viewdoc/download;jsessionid=3CB749B8395B0323CC948BB579941343?doi=10.1.1.99.7659&rep=rep1&type=pdf}, volume = 3, year = 2005 } @book{helbig2008wissensverarbeitung, abstract = {Das Buch gibt eine umfassende Darstellung einer Methodik zur Interpretation und Bedeutungsrepr{\"a}sentation nat{\"u}rlichsprachlicher Ausdr{\"u}cke. Diese Methodik der Mehrschichtigen Erweiterten Semantischen Netze (MultiNet) ist sowohl f{\"u}r theoretische Untersuchungen als auch f{\"u}r die automatische Verarbeitung nat{\"u}rlicher Sprache auf dem Rechner geeignet. Die vorgestellten Ergebnisse sind eingebettet in ein System von Software-Werkzeugen, die eine praktische Nutzung der MultiNet-Darstellungsmittel als Formalismus zur Bedeutungsrepr{\"a}sentation sichern. Hierzu geh{\"o}ren: eine Werkbank f{\"u}r den Wissensingenieur, ein {\"U}bersetzungssystem zur automatischen Gewinnung von Bedeutungsdarstellungen nat{\"u}rlichsprachlicher S{\"a}tze und eine Werkbank f{\"u}r den Computerlexikographen.}, address = {Berlin}, author = {Helbig, Hermann}, doi = {10.1007/978-3-540-76278-2}, edition = {2.}, file = {Amazon Search inside:http\://www.amazon.de/gp/reader/3540762760/:URL}, interhash = {f7c09fb5257be21200f6a9622c5d301c}, intrahash = {6eff05ef4aa01e934aa45df7a7ad3154}, isbn = {3-540-76276-0}, publisher = {Springer}, title = {{Wissensverarbeitung und die Semantik der nat{\"u}rlichen Sprache: Wissensrepr{\"a}sentation mit MultiNet}}, year = 2008 } @article{cattuto2007networkb, author = {Cattuto, Ciro and Schmitz, Christoph and Baldassarri, Andrea and Servedio, Vito D. P. and Loreto, Vittorio and Hotho, Andreas and Grahl, Miranda and Stumme, Gerd}, editor = {Hoche, Susanne and Nürnberger, Andreas and Flach, Jürgen}, interhash = {fc5f2df61d28bc99b7e15029da125588}, intrahash = {da6c676c5664017247c7564fc247b190}, issn = {0921-7126}, journal = {AI Communications Journal, Special Issue on ``Network Analysis in Natural Sciences and Engineering''}, number = 4, pages = {245-262}, publisher = {IOS Press}, title = {Network Properties of Folksonomies}, url = {http://www.kde.cs.uni-kassel.de/stumme/papers/2007/cattuto2007network.pdf}, vgwort = {67}, volume = 20, year = 2007 }