@misc{turney2010frequency, abstract = { Computers understand very little of the meaning of human language. This profoundly limits our ability to give instructions to computers, the ability of computers to explain their actions to us, and the ability of computers to analyse and process text. Vector space models (VSMs) of semantics are beginning to address these limits. This paper surveys the use of VSMs for semantic processing of text. We organize the literature on VSMs according to the structure of the matrix in a VSM. There are currently three broad classes of VSMs, based on term-document, word-context, and pair-pattern matrices, yielding three classes of applications. We survey a broad range of applications in these three categories and we take a detailed look at a specific open source project in each category. Our goal in this survey is to show the breadth of applications of VSMs for semantics, to provide a new perspective on VSMs for those who are already familiar with the area, and to provide pointers into the literature for those who are less familiar with the field. }, author = {Turney, Peter D. and Pantel, Patrick}, interhash = {397ead0766aba687b471395729a263d1}, intrahash = {917bb6b225b8c844b1a15b6577b3845b}, note = {cite arxiv:1003.1141}, title = {From Frequency to Meaning: Vector Space Models of Semantics}, url = {http://arxiv.org/abs/1003.1141}, year = 2010 } @inproceedings{trabelsi2010bridging, abstract = {Social book marking tools are rapidly emerging on the Web as it can be witnessed by the overwhelming number of participants. In such spaces, users annotate resources by means of any keyword or tag that they find relevant, giving raise to lightweight conceptual structures \emph{aka} folksonomies. In this respect, needless to mention that ontologies can be of benefit for enhancing information retrieval metrics. In this paper, we introduce a novel approach for ontology learning from a \textit{folksonomy}, which provide shared vocabularies and semantic relations between tags. The main thrust of the introduced approach stands in putting the focus on the discovery of \textit{non-taxonomic} relationships. The latter are often neglected, even though they are of paramount importance from a semantic point of view. The discovery process heavily relies on triadic concepts to discover and select related tags and to extract and label non-taxonomically relationships between related tags and external sources for tags filtering and non-taxonomic relationships extraction. In addition, we also discuss a new approach to evaluate obtained relations in an automatic way against WordNet repository and presents promising results for a real world \textit{folksonomy}.}, acmid = {1934438}, address = {Washington, DC, USA}, author = {Trabelsi, Chiraz and Jrad, Aicha Ben and Yahia, Sadok Ben}, booktitle = {Proceedings of the 2010 IEEE International Conference on Data Mining Workshops}, doi = {10.1109/ICDMW.2010.72}, interhash = {4f2f573b32d29f76b348ee18d49c9ec4}, intrahash = {26c469e5c064f050f35e4448d0224886}, isbn = {978-0-7695-4257-7}, numpages = {11}, pages = {369--379}, publisher = {IEEE Computer Society}, series = {ICDMW '10}, title = {Bridging Folksonomies and Domain Ontologies: Getting Out Non-taxonomic Relations}, url = {http://dx.doi.org/10.1109/ICDMW.2010.72}, year = 2010 } @inproceedings{suchanek2008social, abstract = {This paper aims to quantify two common assumptions about social tagging: (1) that tags are "meaningful" and (2) that the tagging process is influenced by tag suggestions. For (1), we analyze the semantic properties of tags and the relationship between the tags and the content of the tagged page. Our analysis is based on a corpus of search keywords, contents, titles, and tags applied to several thousand popular Web pages. Among other results, we find that the more popular tags of a page tend to be the more meaningful ones. For (2), we develop a model of how the influence of tag suggestions can be measured. From a user study with over 4,000 participants, we conclude that roughly one third of the tag applications may be induced by the suggestions. Our results would be of interest for designers of social tagging systems and are a step towards understanding how to best leverage social tags for applications such as search and information extraction.}, acmid = {1458114}, address = {New York, NY, USA}, author = {Suchanek, Fabian M. and Vojnovic, Milan and Gunawardena, Dinan}, booktitle = {Proceeding of the 17th ACM conference on Information and knowledge management}, doi = {10.1145/1458082.1458114}, interhash = {1bca5a66a6a562258e0c0357545fed34}, intrahash = {ff31cf8541004adc7cd712ed715706b3}, isbn = {978-1-59593-991-3}, location = {Napa Valley, California, USA}, numpages = {10}, pages = {223--232}, publisher = {ACM}, series = {CIKM '08}, title = {Social tags: meaning and suggestions}, url = {http://doi.acm.org/10.1145/1458082.1458114}, year = 2008 } @inproceedings{kipp2006exploring, abstract = {This paper examines the results of a study of the three groups involved in creating index keywords or tags: users, authors and intermediaries. Keywords from each of the three groups were compared to determine similarities and differences in term use. Comparisons suggested that there were important differences in the contexts of the three groups that should be taken into account when assigning keywords or designing systems for the organisation of information.}, author = {Kipp, Margaret E. I.}, booktitle = {ASIS\&T 2006 Information Architecture Summit}, citeulike-article-id = {581353}, citeulike-linkout-0 = {http://iasummit.org/2006/conferencedescrip.htm\#109}, interhash = {cc95302ec99e70ffae810ee377ae98e6}, intrahash = {904d826cdf2349f8b6ec802eddd6d0c4}, month = mar, posted-at = {2006-04-11 03:32:13}, priority = {3}, title = {Exploring the context of user, creator and intermediate tagging}, url = {http://iasummit.org/2006/conferencedescrip.htm\#109}, year = 2006 } @inproceedings{conf/kcap/EcharteACVL11, author = {Echarte, Francisco and Astrain, José Javier and Córdoba, Alberto and Villadangos, Jesús E. and Labat, Aritz}, booktitle = {K-CAP}, crossref = {conf/kcap/2011}, editor = {Musen, Mark A. and Corcho, Óscar}, ee = {http://doi.acm.org/10.1145/1999676.1999712}, interhash = {e23258881917fe03d4acb056df50c4da}, intrahash = {7b4d79039426f7dea2559bde5d6b9e92}, isbn = {978-1-4503-0396-8}, pages = {175-176}, publisher = {ACM}, title = {A self-adapting method for knowledge management in collaborative and social tagging systems.}, url = {http://dblp.uni-trier.de/db/conf/kcap/kcap2011.html#EcharteACVL11}, year = 2011 } @inproceedings{costa2009social, author = {Costa, Ricardo Araujo and Silva, Edeilson M. and Neto, Mario G. and Delgado, Diego B. and Ribeiro, Rafael A. and Meira, Silvio R. L.}, booktitle = {CRIWG}, crossref = {conf/criwg/2009}, editor = {Carriço, Luís and Baloian, Nelson and Fonseca, Benjamim}, ee = {http://dx.doi.org/10.1007/978-3-642-04216-4_8}, interhash = {3ef53699a4c17a0b3362ccf0bf3df0c0}, intrahash = {f37184a2bf0e5a1b3092a2e7a5870ba7}, isbn = {978-3-642-04215-7}, pages = {94-109}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, title = {Social Knowledge Management in Practice: A Case Study.}, url = {http://dblp.uni-trier.de/db/conf/criwg/criwg2009.html#CostaSNDRM09}, volume = 5784, year = 2009 } @inproceedings{afsharchi2006automated, abstract = {This research addresses the formation of new concepts and their corresponding ontology in a multi-agent system where individual autonomous agents try to learn new concepts by consulting several other agents. In this research individual agents create and learn their distinct conceptualization and rather than a commitment to a common ontology they use their own ontologies. In this paper multi-agent supervised learning of concepts among individual agents with diverse conceptualization and different ontologies is introduced and demonstrated through an intuitive example in which supervisors are other agents rather than a human.}, acmid = {1146863}, address = {New York, NY, USA}, articleno = {16}, author = {Afsharchi, Mohsen and Far, Behrouz H.}, booktitle = {Proceedings of the 1st international conference on Scalable information systems}, doi = {10.1145/1146847.1146863}, interhash = {3614f61a4bddc48c0eeb7eecf6e7adee}, intrahash = {b5528a701397b534b3b0e5a24e37e7e2}, isbn = {1-59593-428-6}, location = {Hong Kong}, publisher = {ACM}, series = {InfoScale '06}, title = {Automated ontology evolution in a multi-agent system}, url = {http://doi.acm.org/10.1145/1146847.1146863}, year = 2006 } @inproceedings{barla2009deriving, author = {Barla, Michal and Bielikov�, M�ria}, booktitle = {Computational Collective Intelligence. Semantic Web, Social Networks and Multiagent System}, editor = {Nguyen, Ngoc Thanh and Kowalczyk, Ryszard and Chen, Shyi-Ming}, interhash = {ff65905d1c79503920fa46c013c2861c}, intrahash = {98c5b4c0cdbc9344773f9867f90a6a3a}, isbn = {978-3-642-04440-3}, pages = {309-320}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, title = {On Deriving Tagsonomies: Keyword Relations Coming from Crowd.}, url = {http://dx.doi.org/10.1007/978-3-642-04441-0_27}, volume = 5796, year = 2009 } @incollection{radelaar2011improving, affiliation = {Erasmus University Rotterdam, PO Box 1738, NL-3000 Rotterdam, The Netherlands}, author = {Radelaar, Joni and Boor, Aart-Jan and Vandic, Damir and van Dam, Jan-Willem and Hogenboom, Frederik and Frasincar, Flavius}, booktitle = {Web Engineering}, editor = {Auer, Sören and Díaz, Oscar and Papadopoulos, George}, interhash = {48fe306f42bc405a5f8ae0f4a8885f3a}, intrahash = {77bc7f7e46481b47c11dd9e53d5741e0}, note = {10.1007/978-3-642-22233-7_19}, pages = {274-288}, publisher = {Springer Berlin / Heidelberg}, series = {Lecture Notes in Computer Science}, title = {Improving the Exploration of Tag Spaces Using Automated Tag Clustering}, url = {http://dx.doi.org/10.1007/978-3-642-22233-7_19}, volume = 6757, year = 2011 } @inproceedings{venetis2011selection, abstract = {We examine the creation of a tag cloud for exploring and understanding a set of objects (e.g., web pages, documents). In the first part of our work, we present a formal system model for reasoning about tag clouds. We then present metrics that capture the structural properties of a tag cloud, and we briefly present a set of tag selection algorithms that are used in current sites (e.g., del.icio.us, Flickr, Technorati) or that have been described in recent work. In order to evaluate the results of these algorithms, we devise a novel synthetic user model. This user model is specifically tailored for tag cloud evaluation and assumes an "ideal" user. We evaluate the algorithms under this user model, as well as the model itself, using two datasets: CourseRank (a Stanford social tool containing information about courses) and del.icio.us (a social bookmarking site). The results yield insights as to when and why certain selection schemes work best.}, acmid = {1935855}, address = {New York, NY, USA}, author = {Venetis, Petros and Koutrika, Georgia and Garcia-Molina, Hector}, booktitle = {Proceedings of the fourth ACM international conference on Web search and data mining}, doi = {10.1145/1935826.1935855}, interhash = {fc7ea4080c46677eeda3a69b67e89d77}, intrahash = {c3ccbbcd57d5c65a03f6f4e8b1eccd02}, isbn = {978-1-4503-0493-1}, location = {Hong Kong, China}, numpages = {10}, pages = {835--844}, publisher = {ACM}, series = {WSDM '11}, title = {On the selection of tags for tag clouds}, url = {http://doi.acm.org/10.1145/1935826.1935855}, year = 2011 } @incollection{giunchiglia2004smatch, abstract = {We think of Match as an operator which takes two graph-like structures (e.g., conceptual hierarchies or ontologies) and produces a mapping between those nodes of the two graphs that correspond semantically to each other. Semantic matching is a novel approach where semantic correspondences are discovered by computing, and returning as a result, the semantic information implicitly or explicitly codified in the labels of nodes and arcs. In this paper we present an algorithm implementing semantic matching, and we discuss its implementation within the S-Match system. We also test S-Match against three state of the art matching systems. The results, though preliminary, look promising, in particular for what concerns precision and recall.}, address = {Berlin / Heidelberg}, affiliation = {Dept. of Information and Communication Technology, University of Trento, 38050 Povo, Trento, Italy}, author = {Giunchiglia, Fausto and Shvaiko, Pavel and Yatskevich, Mikalai}, booktitle = {The Semantic Web: Research and Applications}, doi = {10.1007/978-3-540-25956-5_5}, editor = {Bussler, Christoph and Davies, John and Fensel, Dieter and Studer, Rudi}, interhash = {6b38aa96f0a9200c919a96a5873c9e3c}, intrahash = {c7d77f86497e94b352b6f3279fa70deb}, pages = {61-75}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, title = {S-Match: an Algorithm and an Implementation of Semantic Matching}, url = {http://dx.doi.org/10.1007/978-3-540-25956-5_5}, volume = 3053, year = 2004 } @incollection{cregan2007symbol, abstract = {A true semantic web of data requires dynamic, real-time interopera-bility between disparate data sources, developed by different organizations in different ways, each for their own specific purposes. Ontology languages provide a means to relate data items to each other in logically well-defined ways, producing complex logical structures with an underlying formal semantics. Whilst these structures have a logical formal semantics, they lack a pragmatic semantics linking them in a systematic and unambiguous way to the real world entities they represent. Thus they are intricate "castles in the air", which may certainly have pathways built to link them together, but lack the solid foundations required for robust real-time dynamic interoperability between structures not mapped to each other in the design stage. Current ontology interoperability strategies lack such a meaning-based arbitrator, and depend instead on human mediation or heuristic approaches. This paper introduces the symbol grounding problem, explains its relevance for the Semantic Web, illustrates how inappropriate correspondence between symbol and referent can result in logically valid but meaningless inferences, examines some of the shortcomings of the current approach in dealing effectively at the level of meaning, and concludes with some ideas for identifying effective grounding strategies.}, address = {Berlin / Heidelberg}, affiliation = {National ICT Australia (NICTA)}, author = {Cregan, Anne}, booktitle = {The Semantic Web: Research and Applications}, doi = {10.1007/978-3-540-72667-8_31}, editor = {Franconi, Enrico and Kifer, Michael and May, Wolfgang}, interhash = {e10ab93015619a6b0dc3b01cc5800a03}, intrahash = {d549e1d5c549384fdb765ac257e0fe0d}, pages = {429-442}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, title = {Symbol Grounding for the Semantic Web}, url = {http://dx.doi.org/10.1007/978-3-540-72667-8_31}, volume = 4519, year = 2007 } @inproceedings{bullinaria2008semantic, author = {Bullinaria, J.A.}, file = {bullinaria2008semantic.pdf:bullinaria2008semantic.pdf:PDF}, groups = {public}, interhash = {cdb7b1ff0e89f61f84e2c15a0e46c221}, intrahash = {efae206c0f89363a3273a8d57c87eff5}, journal = {ESSLLI Workshop on Distributional Lexical Semantics}, timestamp = {2011-01-28 09:53:43}, title = {Semantic Categorization Using Simple Word Co-occurrence statistics}, username = {dbenz}, year = 2008 } @inproceedings{gabrilovich2007computing, abstract = {Computing semantic relatedness of natural language texts requires access to vast amounts of common-sense and domain-specific world knowledge. We propose Explicit Semantic Analysis (ESA), a novel method that represents the meaning of texts in a high-dimensional space of concepts derived from Wikipedia. We use machine learning techniques to explicitly represent the meaning of any text as a weighted vector of Wikipedia-based concepts. Assessing the relatedness of texts in this space amounts to comparing the corresponding vectors using conventional metrics (e.g., cosine). Compared with the previous state of the art, using ESA results in substantial improvements in correlation of computed relatedness scores with human judgments: from r = 0:56 to 0:75 for individual words and from r = 0:60 to 0:72 for texts. Importantly, due to the use of natural concepts, the ESA model is easy to explain to human users.}, author = {Gabrilovich, E. and Markovitch, S.}, booktitle = {Proceedings of the 20th International Joint Conference on Artificial Intelligence}, file = {gabrilovich2007computing.pdf:gabrilovich2007computing.pdf:PDF}, groups = {public}, interhash = {5baf6af4bf58cf3926b39a12edb35e58}, intrahash = {839a06f838f02c04a8569fd41a5da284}, pages = {6--12}, timestamp = {2010-08-16 14:11:53}, title = {Computing semantic relatedness using wikipedia-based explicit semantic analysis}, url = {http://scholar.google.de/scholar.bib?q=info:woCrRNTAsA4J:scholar.google.com/&output=citation&hl=de&as_sdt=2000&ct=citation&cd=3}, username = {dbenz}, year = 2007 } @inproceedings{specia2007integrating, abstract = {While tags in collaborative tagging systems serve primarily an indexing purpose, facilitating search and navigation of resources, the use of the same tags by more than one individual can yield a collective classification schema. We present an approach for making explicit the semantics behind the tag space in social tagging systems, so that this collaborative organization can emerge in the form of groups of concepts and partial ontologies. This is achieved by using a combination of shallow pre-processing strategies and statistical techniques together with knowledge provided by ontologies available on the semantic web. Preliminary results on the del.icio.us and Flickr tag sets show that the approach is very promising: it generates clusters with highly related tags corresponding to concepts in ontologies and meaningful relationships among subsets of these tags can be identified.}, author = {Specia, Lucia and Motta, Enrico}, file = {specia2007integrating.pdf:specia2007integrating.pdf:PDF}, groups = {public}, interhash = {b828fbd5c9ddc4f9551f973445ecb283}, intrahash = {8800fc1a639aeb43fd55598d2410e2e1}, pages = {624-639}, publisher = {Springer Berlin / Heidelberg}, series = {Lecture Notes in Computer Science}, timestamp = {2007-09-29 15:16:09}, title = {Integrating Folksonomies with the Semantic Web}, username = {dbenz}, volume = {4519/2007}, year = 2007 } @article{Hazman:30May2009:1744-2621:24, abstract = {Ontologies play a vital role in many web- and internet-related applications. This work presents a system for accelerating the ontology building process via semi-automatically learning a hierarchal ontology given a set of domain-specific web documents and a set of seed concepts. The methods are tested with web documents in the domain of agriculture. The ontology is constructed through the use of two complementary approaches. The presented system has been used to build an ontology in the agricultural domain using a set of Arabic extension documents and evaluated against a modified version of the AGROVOC ontology.}, author = {Hazman, Maryam and El-Beltagy, Samhaa R. and Rafea, Ahmed}, doi = {doi:10.1504/IJMSO.2009.026251}, interhash = {fe27d687bcba91a7a6fe51eec9a2b87d}, intrahash = {323c8bdedc8a4643232a498ac03d6407}, journal = {International Journal of Metadata, Semantics and Ontologies}, pages = {24-33(10)}, title = {Ontology learning from domain specific web documents}, url = {http://www.ingentaconnect.com/content/ind/ijmso/2009/00000004/F0020001/art00003}, volume = 4, year = 2009 } @inproceedings{tesconi2008semantify, abstract = {At present tagging is experimenting a great diffusion as the most adopted way to collaboratively classify resources over the Web. In this paper, after a detailed analysis of the attempts made to improve the organization and structure of tagging systems as well as the usefulness of this kind of social data, we propose and evaluate the Tag Disambiguation Algorithm, mining del.icio.us data. It allows to easily semantify the tags of the users of a tagging service: it automatically finds out for each tag the related concept of Wikipedia in order to describe Web resources through senses. On the basis of a set of evaluation tests, we analyze all the advantages of our sense-based way of tagging, proposing new methods to keep the set of users tags more consistent or to classify the tagged resources on the basis of Wikipedia categories, YAGO classes or Wordnet synsets. We discuss also how our semanitified social tagging data are strongly linked to DBPedia and the datasets of the Linked Data community. 1}, author = {Tesconi, Maurizio and Ronzano, Francesco and Marchetti, Andrea and Minutoli, Salvatore}, booktitle = {Proceedings of the Workshop Social Data on the Web (SDoW2008)}, crossref = {CEUR-WS.org/Vol-405}, file = {tesconi2008semantify.pdf:tesconi2008semantify.pdf:PDF}, groups = {public}, interhash = {0c1c96b41a0af8512c20a7d41504640f}, intrahash = {dd698b5ee4d93496d11627cbe1615514}, timestamp = {2009-09-27 15:57:13}, title = {Semantify del.icio.us: Automatically Turn your Tags into Senses}, url = {http://CEUR-WS.org/Vol-405/paper8.pdf}, username = {dbenz}, year = 2008 } @article{DeMeo2009511, abstract = {In this paper we present a new approach to supporting users to annotate and browse resources referred by a folksonomy. Our approach is characterized by the following novelties: (i) it proposes a probabilistic technique to quickly and accurately determine the similarity and the generalization degrees of two tags; (ii) it proposes two hierarchical structures and two related algorithms to arrange groups of semantically related tags in a hierarchy; this allows users to visualize tags of their interests according to desired semantic granularities and, then, helps them to find those tags best expressing their information needs. In this paper we first illustrate the technical characteristics of our approach; then we describe various experiments allowing its performance to be tested; finally, we compare it with other related approaches already proposed in the literature.}, author = {Meo, Pasquale De and Quattrone, Giovanni and Ursino, Domenico}, doi = {DOI: 10.1016/j.is.2009.02.004}, interhash = {106972d128b1ec0f9d66e2edf1590d0d}, intrahash = {0f982c079c7975ac25b724029b1b6fb9}, issn = {0306-4379}, journal = {Information Systems}, number = 6, pages = {511--535}, title = {Exploitation of semantic relationships and hierarchical data structures to support a user in his annotation and browsing activities in folksonomies}, url = {http://www.sciencedirect.com/science/article/B6V0G-4VPD6TH-2/2/1fa5352ba722fee6afa4dff235c3d898}, volume = 34, year = 2009 } @article{bullinaria2007extracting, author = {Bullinaria, J.A. and Levy, J.P.}, interhash = {913a28789f70f22c7f7b927ffe936116}, intrahash = {9697b015d4d27fabe5f7cf1847c45157}, journal = {Behavior Research Methods}, number = 3, pages = 510, publisher = {Psychonomic Society Publications}, title = {Extracting semantic representations from word co-occurrence statistics: A computational study}, year = 2007 } @incollection{fayyad1996advances, acmid = {257942}, address = {Menlo Park, CA, USA}, author = {Fayyad, Usama M. and Piatetsky-Shapiro, Gregory and Smyth, Padhraic}, chapter = {From data mining to knowledge discovery: an overview}, editor = {Fayyad, Usama M. and Piatetsky-Shapiro, Gregory and Smyth, Padhraic and Uthurusamy, Ramasamy}, interhash = {e62d85a492bbc917f43a5d9c8b775189}, intrahash = {d0b54b224b992e51d892d0f06d45cf6b}, isbn = {0-262-56097-6}, numpages = {34}, pages = {1--34}, publisher = {American Association for Artificial Intelligence}, title = {Advances in knowledge discovery and data mining}, url = {http://portal.acm.org/citation.cfm?id=257938.257942}, year = 1996 }