@article{gyongyi2004combating, author = {Gyöngyi, Zoltán and Garcia molina, Hector and Pedersen, Jan}, interhash = {428bfe5366151d07a234864481364e60}, intrahash = {cb480eab1c421d316fb8cb83f9688af3}, pages = {576--587}, title = {Combating web spam with trustrank}, url = {http://citeseer.uark.edu:8080/citeseerx/viewdoc/summary?doi=10.1.1.122.8129}, year = 2004 } @article{10.1109/TKDE.2012.115, address = {Los Alamitos, CA, USA}, author = {Zubiaga, Arkaitz and Fresno, Victor and Martinez, Raquel and Garcia-Plaza, Alberto P.}, doi = {http://doi.ieeecomputersociety.org/10.1109/TKDE.2012.115}, interhash = {f2e961e2b99fec0634b0d4fa3e001282}, intrahash = {8a25332bfeb33e2ad8e1e1a062976da2}, issn = {1041-4347}, journal = {IEEE Transactions on Knowledge and Data Engineering}, number = {PrePrints}, publisher = {IEEE Computer Society}, title = {Harnessing Folksonomies to Produce a Social Classification of Resources}, volume = 99, year = 2012 } @inproceedings{mendes2011dbpedia, abstract = {Interlinking text documents with Linked Open Data enables the Web of Data to be used as background knowledge within document-oriented applications such as search and faceted browsing. As a step towards interconnecting the Web of Documents with the Web of Data, we developed DBpedia Spotlight, a system for automatically annotating text documents with DBpedia URIs. DBpedia Spotlight allows users to configure the annotations to their specific needs through the DBpedia Ontology and quality measures such as prominence, topical pertinence, contextual ambiguity and disambiguation confidence. We compare our approach with the state of the art in disambiguation, and evaluate our results in light of three baselines and six publicly available annotation systems, demonstrating the competitiveness of our system. DBpedia Spotlight is shared as open source and deployed as a Web Service freely available for public use.}, acmid = {2063519}, address = {New York, NY, USA}, author = {Mendes, Pablo N. and Jakob, Max and García-Silva, Andrés and Bizer, Christian}, booktitle = {Proceedings of the 7th International Conference on Semantic Systems}, doi = {10.1145/2063518.2063519}, interhash = {92df08698e5608afc6dc5b3e9be76880}, intrahash = {58fbb395741cce1d5370a6f205f24843}, isbn = {978-1-4503-0621-8}, location = {Graz, Austria}, numpages = {8}, pages = {1--8}, publisher = {ACM}, title = {DBpedia spotlight: shedding light on the web of documents}, url = {http://doi.acm.org/10.1145/2063518.2063519}, year = 2011 } @misc{lpezczar2012manipulating, abstract = {The launch of Google Scholar Citations and Google Scholar Metrics may provoke a revolution in the research evaluation field as it places within every researchers reach tools that allow bibliometric measuring. In order to alert the research community over how easily one can manipulate the data and bibliometric indicators offered by Google s products we present an experiment in which we manipulate the Google Citations profiles of a research group through the creation of false documents that cite their documents, and consequently, the journals in which they have published modifying their H index. For this purpose we created six documents authored by a faked author and we uploaded them to a researcher s personal website under the University of Granadas domain. The result of the experiment meant an increase of 774 citations in 129 papers (six citations per paper) increasing the authors and journals H index. We analyse the malicious effect this type of practices can cause to Google Scholar Citations and Google Scholar Metrics. Finally, we conclude with several deliberations over the effects these malpractices may have and the lack of control tools these tools offer}, author = {López-Cózar, Emilio Delgado and Robinson-García, Nicolás and Torres-Salinas, Daniel}, interhash = {99886e72204cb2154bf6a0481e782bdd}, intrahash = {18876648c3a45cd3ac5a3527a8fbdf44}, note = {cite arxiv:1212.0638Comment: 10 pages, 4 figures}, title = {Manipulating Google Scholar Citations and Google Scholar Metrics: simple, easy and tempting}, url = {http://arxiv.org/abs/1212.0638}, year = 2012 } @article{cho2006stanford, abstract = {We describe the design and performance of WebBase, a tool for Web research. The system includes a highly customizable crawler, a repository for collected Web pages, an indexer for both text and link-related page features, and a high-speed content distribution facility. The distribution module enables researchers world-wide to retrieve pages from WebBase, and stream them across the Internet at high speed. The advantage for the researchers is that they need not all crawl the Web before beginning their research. WebBase has been used by scores of research and teaching organizations world-wide, mostly for investigations into Web topology and linguistic content analysis. After describing the system's architecture, we explain our engineering decisions for each of the WebBase components, and present respective performance measurements.}, acmid = {1149124}, address = {New York, NY, USA}, author = {Cho, Junghoo and Garcia-Molina, Hector and Haveliwala, Taher and Lam, Wang and Paepcke, Andreas and Raghavan, Sriram and Wesley, Gary}, doi = {10.1145/1149121.1149124}, interhash = {bebbc072ea2dccf4c2b27abf244c1f08}, intrahash = {3cd21bf8a87619e0489b8da177c9f0b4}, issn = {1533-5399}, issue_date = {May 2006}, journal = {ACM Transactions on Internet Technology}, month = may, number = 2, numpages = {34}, pages = {153--186}, publisher = {ACM}, title = {Stanford WebBase components and applications}, url = {http://doi.acm.org/10.1145/1149121.1149124}, volume = 6, year = 2006 } @inproceedings{heymann2011turkalytics, abstract = {We present "Turkalytics," a novel analytics tool for human computation systems. Turkalytics processes and reports logging events from workers in real-time and has been shown to scale to over one hundred thousand logging events per day. We present a state model for worker interaction that covers the Mechanical Turk (the SCRAP model) and a data model that demonstrates the diversity of data collected by Turkalytics. We show that Turkalytics is effective at data collection, in spite of it being unobtrusive. Lastly, we describe worker locations, browser environments, activity information, and other examples of data collected by our tool.}, acmid = {1963473}, address = {New York, NY, USA}, author = {Heymann, Paul and Garcia-Molina, Hector}, booktitle = {Proceedings of the 20th international conference on World wide web}, doi = {10.1145/1963405.1963473}, interhash = {6d183b7917745ec2ef531e66e18f4bcd}, intrahash = {9461e2c2c5f0a6304ad6017a56788217}, isbn = {978-1-4503-0632-4}, location = {Hyderabad, India}, numpages = {10}, pages = {477--486}, publisher = {ACM}, title = {Turkalytics: analytics for human computation}, url = {http://doi.acm.org/10.1145/1963405.1963473}, year = 2011 } @techreport{parameswaran2011declarative, abstract = {Crowdsourcing enables programmers to incorporate ``human computation'' as a building block in algorithms that cannot be fully automated, such as text analysis and image recognition. Similarly, humans can be used as a building block in data-intensive applications --- providing, comparing, and verifying data used by applications. Building upon the decades-long success of declarative approaches to conventional data management, we use a similar approach for data-intensive applications that incorporate humans. Specifically, declarative queries are posed over stored relational data as well as data computed on-demand from the crowd, and the underlying system orchestrates the computation of query answers. We present Deco, a database system for declarative crowdsourcing. We describe Deco's data model, query language, and our initial prototype. Deco's data model was designed to be general (it can be instantiated to other proposed models), flexible (it allows methods for uncertainty resolution and external access to be plugged in), and principled (it has a precisely-defined semantics). Syntactically, Deco's query language is a simple extension to SQL. Based on Deco's data model, we define a precise semantics for arbitrary queries involving both stored data and data obtained from the crowd. We then describe the Deco query processor, which respects our semantics while coping with the unique combination of latency, monetary cost, and uncertainty introduced in the crowdsourcing environment. Finally, we describe our current system implementation, and we discuss the novel query optimization challenges that form the core of our ongoing work.}, author = {Parameswaran, Aditya and Park, Hyunjung and Garcia-Molina, Hector and Polyzotis, Neoklis and Widom, Jennifer}, institution = {Stanford University}, interhash = {af28066d0b21d87a9ef90f63d7e6095f}, intrahash = {4de5dd97e5466c9f1fc63c0d23b4d90a}, number = 1015, publisher = {Stanford InfoLab}, title = {Deco: Declarative Crowdsourcing}, url = {http://ilpubs.stanford.edu:8090/1015/}, year = 2011 } @incollection{alegria1995aspectos, address = {Lissabon}, author = {{Alegria, Maria Fernanda}, {Garcia, João Carlos}}, booktitle = {Os mapas em Portugal: da tradicão aos rumos da cartografia}, editor = {Dias, Maria Helena}, interhash = {36cc6f2d02b6117d576fe6f3bc1e2223}, intrahash = {595e679b315d6928e2d8cd893bc29687}, pages = {29-84}, title = {Aspectos da evolucão da Cartografia portuguesa (séculos XV a XIX)}, year = 1995 } @article{garcia1981beatos, author = {Garcia, Iglesias J. M.}, interhash = {4192601a9479ff4605e1b831c59157cc}, intrahash = {afac5c25f0295161a3fa29cdcb263a6f}, journal = {Archivos Leoneses}, pages = {73-87}, title = {El mapa de los Beatos en la pintura mural romanica de San Pedro de Rocas (Orense)}, volume = 35, year = 1981 } @incollection{alegria1952aspectos, address = {Lissabon}, author = {Alegria, Maria Fernanda and Garcia, João Carlos}, booktitle = {Os mapas em Portugal: da tradicão aos rumos da cartografia}, editor = {Dias, Maria Helena}, interhash = {f32e3d6b5821858666b0ab30cade35db}, intrahash = {dbdc57b8fead3e917b1d41ae0ef96ed1}, pages = {29-84}, title = {Aspectos da evolucão da Cartografia portuguesa (séculos XV a XIX)}, year = 1952 } @inproceedings{venetis2011selection, abstract = {We examine the creation of a tag cloud for exploring and understanding a set of objects (e.g., web pages, documents). In the first part of our work, we present a formal system model for reasoning about tag clouds. We then present metrics that capture the structural properties of a tag cloud, and we briefly present a set of tag selection algorithms that are used in current sites (e.g., del.icio.us, Flickr, Technorati) or that have been described in recent work. In order to evaluate the results of these algorithms, we devise a novel synthetic user model. This user model is specifically tailored for tag cloud evaluation and assumes an "ideal" user. We evaluate the algorithms under this user model, as well as the model itself, using two datasets: CourseRank (a Stanford social tool containing information about courses) and del.icio.us (a social bookmarking site). The results yield insights as to when and why certain selection schemes work best.}, acmid = {1935855}, address = {New York, NY, USA}, author = {Venetis, Petros and Koutrika, Georgia and Garcia-Molina, Hector}, booktitle = {Proceedings of the fourth ACM international conference on Web search and data mining}, doi = {10.1145/1935826.1935855}, interhash = {fc7ea4080c46677eeda3a69b67e89d77}, intrahash = {c3ccbbcd57d5c65a03f6f4e8b1eccd02}, isbn = {978-1-4503-0493-1}, location = {Hong Kong, China}, numpages = {10}, pages = {835--844}, publisher = {ACM}, series = {WSDM '11}, title = {On the selection of tags for tag clouds}, url = {http://doi.acm.org/10.1145/1935826.1935855}, year = 2011 } @techreport{antonellis2011navigating, abstract = {We propose to integrate various pieces of information about a web page (search queries, social annotations, terms extracted from the pagetext) into a navigational menu. This menu displays an auxiliary set of tags (navigational tags) selected with the goal of helping user navigation. We propose a novel framework (navigational utility) for comparing different tag selection methods. We also investigate which source of tags is more suitable for our scenario and we conclude that tags extracted from search queries (query tags) are more appropriate.}, author = {Antonellis, Ioannis and Karim, Jawed and Garcia-Molina, Hector}, institution = {Stanford University}, interhash = {bed616d711587f01598c6ea645e36a33}, intrahash = {defb921ef0fc05065943e008cce6dd0e}, publisher = {Stanford InfoLab}, title = {Navigating the Web with Query Tags}, type = {Technical Report}, url = {http://ilpubs.stanford.edu:8090/943/}, year = 2011 } @inproceedings{garcia2009preliminary, abstract = {The availability of tag-based user-generated content for a variety of Web resources (music, photos, videos, text, etc.) has largely increased in the last years. Users can assign tags freely and then use them to share and retrieve information. However, tag-based sharing and retrieval is not optimal due to the fact that tags are plain text labels without an explicit or formal meaning, and hence polysemy and synonymy should be dealt with appropriately. To ameliorate these problems, we propose a context-based tag disambiguation algorithm that selects the meaning of a tag among a set of candidate DBpedia entries, using a common information retrieval similarity measure. The most similar DBpedia en-try is selected as the one representing the meaning of the tag. We describe and analyze some preliminary results, and discuss about current challenges in this area.}, author = {Garcia, Andres and Szomszor, Martin and Alani, Harith and Corcho, Oscar}, booktitle = {Knowledge Capture (K-Cap'09) - First International Workshop on Collective Knowledge Capturing and Representation - CKCaR'09}, file = {garcia2009preliminary.pdf:garcia2009preliminary.pdf:PDF}, groups = {public}, interhash = {5da3fa037c8f1bc0b4a6255a46e08077}, intrahash = {dfe0fee496a65763bcfae4070ffcf47e}, month = {September}, timestamp = {2011-02-17 10:59:45}, title = {Preliminary Results in Tag Disambiguation using DBpedia}, url = {http://eprints.ecs.soton.ac.uk/17792/}, username = {dbenz}, year = 2009 } @article{garciasilva2011review, abstract = {This paper describes and compares the most relevant approaches for associating tags with semantics in order to make explicit the meaning of those tags. We identify a common set of steps that are usually considered across all these approaches and frame our descriptions according to them, providing a unified view of how each approach tackles the different problems that appear during the semantic association process. Furthermore, we provide some recommendations on (a) how and when to use each of the approaches according to the characteristics of the data source, and (b) how to improve results by leveraging the strengths of the different approaches.}, author = {Garcia-Silva, Andres and Corcho, Oscar and Alani, Harith and Gomez-Perez, Asuncion}, file = {garciasilva2011review.pdf:garciasilva2011review.pdf:PDF}, groups = {public}, interhash = {ef913839d8ab1f3955a9d05c5ba2fadf}, intrahash = {42f77eb846bdae1847ea70ca5ba6c9ec}, journal = {Knowledge Engineering Review}, month = {December}, number = 4, timestamp = {2011-02-15 03:13:28}, title = {Review of the state of the art: Discovering and Associating Semantics to Tags in Folksonomies}, username = {dbenz}, volume = 26, year = 2011 } @inproceedings{heymann2010tagging, abstract = {A fundamental premise of tagging systems is that regular users can organize large collections for browsing and other tasks using uncontrolled vocabularies. Until now, that premise has remained relatively unexamined. Using library data, we test the tagging approach to organizing a collection. We find that tagging systems have three major large scale organizational features: consistency, quality, and completeness. In addition to testing these features, we present results suggesting that users produce tags similar to the topics designed by experts, that paid tagging can effectively supplement tags in a tagging system, and that information integration may be possible across tagging systems.}, author = {Heymann, Paul and Paepcke, Andreas and Garcia-Molina, Hector}, booktitle = {WSDM}, crossref = {conf/wsdm/2010}, date = {2010-02-18}, editor = {Davison, Brian D. and Suel, Torsten and Craswell, Nick and Liu, Bing}, ee = {http://doi.acm.org/10.1145/1718487.1718495}, file = {:heyman2010tagging.pdf:PDF}, groups = {public}, interhash = {d4f72ed57e6b99dbe32e18e218d81ef5}, intrahash = {12579231cd5449f9a40cba9924975f09}, isbn = {978-1-60558-889-6}, pages = {51-60}, publisher = {ACM}, timestamp = {2010-04-08 07:27:02}, title = {Tagging human knowledge.}, url = {http://dblp.uni-trier.de/db/conf/wsdm/wsdm2010.html#HeymannPG10}, username = {dbenz}, year = 2010 } @inproceedings{antonellis2009tagging, abstract = {Web search queries capture the information need of search engine users. Search engines store these queries in their logs and analyze them to guide their search results.In this work, we argue that not only a search engine can benefit from data stored in these logs, but also the web users. We first show how clickthrough logs can be collected in a distributed fashion using the http referer field in web server access logs. We then perform a set of experiments to study the information value of search engine queries when treated as "tags" or "labels" for the web pages that both appear as a result and the user actually clicks on. We ask how much extra information these query tags provide for web pagesby comparing them to tags from the del.icio.us bookmarking site and to the pagetext. We find that query tags can provide substantially many (on average 250 tags per URL), new tags (on average 125 tags per URL are not present in the pagetext) for a large fraction of the Web.}, address = {New York, NY, USA}, author = {Antonellis, Ioannis and Garcia-Molina, Hector and Karim, Jawed}, booktitle = {WSDM (Late Breaking-Results)}, crossref = {conf/wsdm/2009}, date = {2009-03-12}, editor = {Baeza-Yates, Ricardo A. and Boldi, Paolo and Ribeiro-Neto, Berthier A. and Cambazoglu, Berkant Barla}, interhash = {d7009d789ebc4efe38749a1078a06086}, intrahash = {70539954a20f7d03a1f21764ff62c0ff}, isbn = {978-1-60558-390-7}, publisher = {ACM}, title = {Tagging with Queries: How and Why?}, url = {http://www.wsdm2009.org/wsdm2009_antonellis.pdf}, year = 2009 } @techreport{heymann2006collaborative, abstract = {Collaborative tagging systems---systems where many casual users annotate objects with free-form strings (tags) of their choosing---have recently emerged as a powerful way to label and organize large collections of data. During our recent investigation into these types of systems, we discovered a simple but remarkably effective algorithm for converting a large corpus of tags annotating objects in a tagging system into a navigable hierarchical taxonomy of tags. We first discuss the algorithm and then present a preliminary model to explain why it is so effective in these types of systems.}, author = {Heymann, Paul and Garcia-Molina, Hector}, file = {heymann2006collaborative.pdf:heymann2006collaborative.pdf:PDF}, groups = {public}, institution = {Computer Science Department, Standford University}, interhash = {d77846b40aadb0e25233cabf905bb93e}, intrahash = {a6010ad0fef7cb1442298402ebb979b6}, lastdatemodified = {2007-04-27}, lastname = {Heymann}, month = {April}, own = {own}, pdf = {heyman06-collaborative.pdf}, read = {notread}, timestamp = {2007-05-25 16:05:53}, title = {Collaborative Creation of Communal Hierarchical Taxonomies in Social Tagging Systems}, url = {dbpubs.stanford.edu:8090/pub/2006-10}, username = {dbenz}, year = 2006 } @inproceedings{ramage2009clustering, abstract = {Automatically clustering web pages into semantic groups promises improved search and browsing on the web. In this paper, we demonstrate how user-generated tags from largescale social bookmarking websites such as del.icio.us can be used as a complementary data source to page text and anchor text for improving automatic clustering of web pages. This paper explores the use of tags in 1) K-means clustering in an extended vector space model that includes tags as well as page text and 2) a novel generative clustering algorithm based on latent Dirichlet allocation that jointly models text and tags. We evaluate the models by comparing their output to an established web directory. We find that the naive inclusion of tagging data improves cluster quality versus page text alone, but a more principled inclusion can substantially improve the quality of all models with a statistically significant absolute F-score increase of 4%. The generative model outperforms K-means with another 8% F-score increase.}, address = {New York, NY, USA}, author = {Ramage, Daniel and Heymann, Paul and Manning, Christopher D. and Garcia-Molina, Hector}, booktitle = {WSDM '09: Proceedings of the Second ACM International Conference on Web Search and Data Mining}, doi = {http://doi.acm.org/10.1145/1498759.1498809}, file = {ramage2009clustering.pdf:ramage2009clustering.pdf:PDF}, groups = {public}, interhash = {5595f06f88310ed67fd6fe23f813c69b}, intrahash = {75c4bad29d7eb4b34f68da27f0353516}, isbn = {978-1-60558-390-7}, location = {Barcelona, Spain}, pages = {54--63}, publisher = {ACM}, timestamp = {2009-04-24 10:19:45}, title = {Clustering the tagged web}, url = {http://portal.acm.org/citation.cfm?id=1498809}, username = {dbenz}, year = 2009 } @misc{garciasilva2010associating, abstract = {Tagging systems are nowadays a common feature in web sites where user-generated content plays an important role. However, the lack of semantics and multilinguality hamper information retrieval process based on folksonomies. In this paper we propose an approach to bring semantics to multilingual folksonomies. This approach includes a sense disambiguation activity and takes advantage from knowledge generated by the masses in the form of articles, redirection and disambiguation links, and translations in Wikipedia. We use DBpedia[2] as semantic resource to define the tag meanings.}, author = {Garcia-Silva, A. and Corcho, Oscar and Gracia, J.}, booktitle = {Proceedings of the 17th International Conference on Knowledge Engineering and Knowledge Management (EKAW 2010)}, file = {garciasilva2010associating.pdf:garciasilva2010associating.pdf:PDF}, groups = {public}, interhash = {25554c0d0a05182fc0f5c63004aece2d}, intrahash = {46622f94e66dd7af1b2f6629840ec25b}, location = {Lisboa, Portugal}, timestamp = {2011-02-02 14:29:03}, title = {Associating Semantics to Multilingual Tags in Folksonomies}, url = {http://oa.upm.es/5646/}, username = {dbenz}, year = 2010 } @article{octavio1985nonexistence, abstract = {We prove that there is no free object over a countable set in the category of complete distributive lattices with homomorphisms preserving binary meets and arbitrary joins.}, author = {Garcia, Octavio and Nelson, Evelyn}, interhash = {c4a8aeddff294dbdd727c05059ab2050}, intrahash = {d4ce23539ea3bc784068e93dc70387aa}, journal = {Order}, month = {December}, number = 4, pages = {399--403}, title = {On the nonexistence of free complete distributive lattices}, url = {http://dx.doi.org/10.1007/BF00582745}, volume = 1, year = 1985 }