@article{boerner2012design, abstract = {Global maps of science can be used as a reference system to chart career trajectories, the location of emerging research frontiers, or the expertise profiles of institutes or nations. This paper details data preparation, analysis, and layout performed when designing and subsequently updating the UCSD map of science and classification system. The original classification and map use 7.2 million papers and their references from Elsevier’s Scopus (about 15,000 source titles, 2001–2005) and Thomson Reuters’ Web of Science (WoS) Science, Social Science, Arts & Humanities Citation Indexes (about 9,000 source titles, 2001–2004)–about 16,000 unique source titles. The updated map and classification adds six years (2005–2010) of WoS data and three years (2006–2008) from Scopus to the existing category structure–increasing the number of source titles to about 25,000. To our knowledge, this is the first time that a widely used map of science was updated. A comparison of the original 5-year and the new 10-year maps and classification system show (i) an increase in the total number of journals that can be mapped by 9,409 journals (social sciences had a 80% increase, humanities a 119% increase, medical (32%) and natural science (74%)), (ii) a simplification of the map by assigning all but five highly interdisciplinary journals to exactly one discipline, (iii) a more even distribution of journals over the 554 subdisciplines and 13 disciplines when calculating the coefficient of variation, and (iv) a better reflection of journal clusters when compared with paper-level citation data. When evaluating the map with a listing of desirable features for maps of science, the updated map is shown to have higher mapping accuracy, easier understandability as fewer journals are multiply classified, and higher usability for the generation of data overlays, among others.}, author = {Börner, Katy and Klavans, Richard and Patek, Michael and Zoss, Angela M. and Biberstine, Joseph R. and Light, Robert P. and Larivière, Vincent and Boyack, Kevin W.}, doi = {10.1371/journal.pone.0039464}, interhash = {c27eeafd6c2d77f7022ce10236e3dd47}, intrahash = {494fdcbea8cd30a1a04a06aa1696fce6}, journal = {PLoS ONE}, month = jul, number = 7, pages = {e39464}, publisher = {Public Library of Science}, title = {Design and Update of a Classification System: The UCSD Map of Science}, url = {http://dx.doi.org/10.1371%2Fjournal.pone.0039464}, volume = 7, year = 2012 } @article{larowe2009scholarly, abstract = {The Scholarly Database aims to serve researchers and practitioners interested in the analysis, modelling, and visualization of large-scale data sets. A specific focus of this database is to support macro-evolutionary studies of science and to communicate findings via knowledge-domain visualizations. Currently, the database provides access to about 18 million publications, patents, and grants. About 90% of the publications are available in full text. Except for some datasets with restricted access conditions, the data can be retrieved in raw or pre-processed formats using either a web-based or a relational database client. This paper motivates the need for the database from the perspective of bibliometric/scientometric research. It explains the database design, setup, etc., and reports the temporal, geographical, and topic coverage of data sets currently served via the database. Planned work and the potential for this database to become a global testbed for information science research are discussed at the end of the paper.}, author = {La Rowe, Gavin and Ambre, Sumeet and Burgoon, John and Ke, Weimao and Börner, Katy}, doi = {10.1007/s11192-009-0414-2}, interhash = {1819f263b0ea1b99ec15d0c22b38207e}, intrahash = {c24611ec1f2efbdcf7f5b26d49af320e}, issn = {0138-9130}, journal = {Scientometrics}, language = {English}, number = 2, pages = {219--234}, publisher = {Springer Netherlands}, title = {The Scholarly Database and its utility for scientometrics research}, url = {http://dx.doi.org/10.1007/s11192-009-0414-2}, volume = 79, year = 2009 } @article{dunne2012rapid, abstract = {Keeping up with rapidly growing research fields, especially when there are multiple interdisciplinary sources, requires substantial effort for researchers, program managers, or venture capital investors. Current theories and tools are directed at finding a paper or website, not gaining an understanding of the key papers, authors, controversies, and hypotheses. This report presents an effort to integrate statistics, text analytics, and visualization in a multiple coordinated window environment that supports exploration. Our prototype system, Action Science Explorer (ASE), provides an environment for demonstrating principles of coordination and conducting iterative usability tests of them with interested and knowledgeable users. We developed an understanding of the value of reference management, statistics, citation text extraction, natural language summarization for single and multiple documents, filters to interactively select key papers, and network visualization to see citation patterns and identify clusters. A three-phase usability study guided our revisions to ASE and led us to improve the testing methods.}, author = {Dunne, Cody and Shneiderman, Ben and Gove, Robert and Klavans, Judith and Dorr, Bonnie}, doi = {10.1002/asi.22652}, interhash = {f031d712f64663242af6b6ec95b74f48}, intrahash = {045df67628ff0ae9b75bb1ecf915d025}, issn = {1532-2890}, journal = {Journal of the American Society for Information Science and Technology}, number = 12, pages = {2351--2369}, title = {Rapid understanding of scientific paper collections: Integrating statistics, text analytics, and visualization}, url = {http://dx.doi.org/10.1002/asi.22652}, volume = 63, year = 2012 } @incollection{pieper2009wissenschaftliche, abstract = {Dieser Beitrag untersucht, in welchem Umfang Dokumente von Dokumentenservern wissenschaftlicher Institutionen in den allgemeinen Suchmaschinen Google und Yahoo nachgewiesen sind und inwieweit wissenschaftliche Suchmaschinen für die Suche nach solchen Dokumenten besser geeignet sind. Dazu werden die fünf Suchmaschinen BASE, Google Scholar, OAIster, Scientific Commons und Scirus überblickartig beschrieben und miteinander verglichen. Hauptaugenmerk wird dabei auf die unterschiedlichen Inhalte, Suchfunktionen und Ausgabemöglichkeiten gelegt, mit Hilfe eines Retrievaltests wird speziell die Leistungsfähigkeit der Suchmaschinen beim Auffinden von Dokumenten, deren Volltexte im Sinne des Open Access direkt und ohne Beschränkungen aufrufbar sind, untersucht.}, author = {Pieper, Dirk and Wolf, Sebastian}, booktitle = {Handbuch Internet-Suchmaschinen: Nutzerorientierung in Wissenschaft und Praxis}, editor = {Dirk, Lewandowski}, interhash = {b915fb45a9a6dc3499247e76992c7897}, intrahash = {1f997db426731303690c9bb962f1c158}, pages = {356--374}, publisher = {Akademische Verlagsgesellschaft AKA}, title = {Wissenschaftliche Dokumente in Suchmaschinen}, url = {http://eprints.rclis.org/12746/}, year = 2009 } @inproceedings{krafft2010enabling, abstract = {The VIVO project is creating an open, Semantic Web-based network of institutional ontology-driven databases to enable national discovery, networking, and collaboration via information sharing about researchers and their activities. The project has been funded by NIH to implement VIVO at the University of Florida, Cornell University, and Indiana University Bloomington together with four other partner institutions. Working with the Semantic Web/Linked Open Data community, the project will pilot the development of common ontologies, integration with institutional information sources and authentication, and national discovery and exploration of networks of researchers. Building on technology developed over the last five years at Cornell University, VIVO supports the flexible description and interrelation of people, organizations, activities, projects, publications, affiliations, and other entities and properties. VIVO itself is an open source Java application built on W3C Semantic Web standards, including RDF, OWL, and SPARQL. To create researcher profiles, VIVO draws on authoritative information from institutional databases, external data sources such as PubMed, and information provided directly by researchers themselves. While the NIH-funded project focuses on biomedical research, the current Cornell implementation of VIVO supports the full range of disciplines across the university, from music to mechanical engineering to management. There are many ways a person?s expertise may be discovered, through grants, presentations, courses and news releases, as well as through research statements or publications listed on their profile{--}resulting in the creation of implicit groups or networks of people based on a number of pre-identified, shared characteristics. In addition to formal authoritative information and relationships, VIVO can also support the creation of personal work groups and associated properties to represent the informal relationships evolving around collaboration.}, author = {Krafft, Dean B. and Cappadona, Nicholas A. and Caruso, Brian and Corson-Rikert, Jon and Devare, Medha and Lowe, Brian J. and Collaboration, VIVO}, booktitle = {WebSci10: Extending the Frontiers of Society On-Line}, interhash = {be9a22c8b28fcf00dc26025b5b127956}, intrahash = {87a568555fcc35532e9384337c1ce68a}, title = {VIVO: Enabling National Networking of Scientists}, url = {http://journal.webscience.org/316/}, year = 2010 } @article{bechhofer2013linked, abstract = {Scientific data represents a significant portion of the linked open data cloud and scientists stand to benefit from the data fusion capability this will afford. Publishing linked data into the cloud, however, does not ensure the required reusability. Publishing has requirements of provenance, quality, credit, attribution and methods to provide the reproducibility that enables validation of results. In this paper we make the case for a scientific data publication model on top of linked data and introduce the notion of Research Objects as first class citizens for sharing and publishing.}, author = {Bechhofer, Sean and Buchan, Iain and De Roure, David and Missier, Paolo and Ainsworth, John and Bhagat, Jiten and Couch, Philip and Cruickshank, Don and Delderfield, Mark and Dunlop, Ian and Gamble, Matthew and Michaelides, Danius and Owen, Stuart and Newman, David and Sufi, Shoaib and Goble, Carole}, doi = {10.1016/j.future.2011.08.004}, interhash = {8df8b7069a622aa2eae6d74e5fdc0a6b}, intrahash = {f500b67a045765125183e23c827991d2}, issn = {0167-739X}, journal = {Future Generation Computer Systems}, number = 2, pages = {599--611}, title = {Why linked data is not enough for scientists}, url = {http://www.sciencedirect.com/science/article/pii/S0167739X11001439}, volume = 29, year = 2013 } @article{sun2013social, abstract = {The birth and decline of disciplines are critical to science and society. How do scientific disciplines emerge? No quantitative model to date allows us to validate competing theories on the different roles of endogenous processes, such as social collaborations, and exogenous events, such as scientific discoveries. Here we propose an agent-based model in which the evolution of disciplines is guided mainly by social interactions among agents representing scientists. Disciplines emerge from splitting and merging of social communities in a collaboration network. We find that this social model can account for a number of stylized facts about the relationships between disciplines, scholars, and publications. These results provide strong quantitative support for the key role of social interactions in shaping the dynamics of science. While several “science of science” theories exist, this is the first account for the emergence of disciplines that is validated on the basis of empirical data.}, author = {Sun, Xiaoling and Kaur, Jasleen and Milojevic, Stasa and Flammini, Alessandro and Menczer, Filippo}, doi = {10.1038/srep01069}, interhash = {5cd31392e997555d78596f962044f84b}, intrahash = {721dcd5644cca27fd50d8e6ffd667056}, journal = {Scientific Reports}, month = jan, publisher = {Macmillan Publishers Limited}, title = {Social Dynamics of Science}, url = {http://dx.doi.org/10.1038/srep01069}, volume = 3, year = 2013 } @article{aguillo2009measuring, abstract = {Purpose – The purpose of this paper is to provide an alternative, although complementary, system for the evaluation of the scholarly activities of academic organizations, scholars and researchers, based on web indicators, in order to speed up the change of paradigm in scholarly communication towards a new fully electronic twenty-first century model. Design/methodology/approach – In order to achieve these goals, a new set of web indicators has been introduced, obtained mainly from data gathered from search engines, the new mediators of scholarly communication. Findings – It was found that three large groups of indicators are feasible to obtain and relevant for evaluation purposes: activity (web publication); impact (visibility) and usage (visits and visitors). As a proof of concept, a Ranking Web of Universities has been built with Webometrics data. There are two relevant findings: ranking results are similar to those obtained by other bibliometric-based rankings; and there is a concerning digital divide between North American and European universities, which appear in lower positions when compared with their USA and Canada counterparts. Research limitations/implications – Cybermetrics is still an emerging discipline, so new developments should be expected when more empirical data become available. Practical implications – The proposed approach suggests the publication of truly electronic journals, rather than digital versions of printed articles. Additional materials, such as raw data and multimedia files, should be included along with other relevant information arising from more informal activities. These repositories should be Open Access, available as part of the public web, indexed by the main commercial search engines. It is expected that these actions could generate larger web-based audiences, reduce the costs of publication and access and allow third parties to take advantage of the knowledge generated, without sacrificing peer review, which should be extended (pre- and post-) and expanded (closed and open). Originality/value – A full taxonomy of web indicators is introduced for describing and evaluating research activities, academic organizations and individual scholars and scientists. Previous attempts for building such classification were incomplete and did not take into account feasibility and efficiency.}, address = {Bingley}, author = {Aguillo, Isidro}, doi = {10.1108/073788309}, interhash = {116e889174766cd359f7e79eb1a36302}, intrahash = {9734b1f272204883b30dedaa8069fdad}, issn = {0737-8831}, journal = {Library Hi Tech}, number = 4, pages = {540--556}, publisher = {Emerald Group Publishing Limited}, title = {Measuring the institution's footprint in the web}, url = {http://www.emeraldinsight.com/journals.htm?articleid=1812469&show=abstract}, volume = 27, year = 2009 } @presentation{easterbrook2007basics, author = {Easterbrook, Steve}, interhash = {f03b26df87bcede0b52494216ec1c4a1}, intrahash = {736d303f8938e4635458544e94ad1a39}, series = {Empirical Research Methods in SE}, title = {Basics of Doing Research}, url = {http://www.cs.toronto.edu/~sme/CSC2130/slides/04-basics.pdf}, year = 2007 } @article{wilson2012practices, abstract = {Scientists spend an increasing amount of time building and using software. However, most scientists are never taught how to do this efficiently. As a result, many are unaware of tools and practices that would allow them to write more reliable and maintainable code with less effort. We describe a set of best practices for scientific software development that have solid foundations in research and experience, and that improve scientists' productivity and the reliability of their software.}, author = {Wilson, Greg and Aruliah, D. A. and Brown, C. Titus and Hong, Neil P. Chue and Davis, Matt and Guy, Richard T. and Haddock, Steven H. D. and Huff, Katy and Mitchell, Ian M. and Plumbley, Mark and Waugh, Ben and White, Ethan P. and Wilson, Paul}, interhash = {78f98610c430aa34dc2e161bb8069401}, intrahash = {e28ce8ccadfa439cce3bcdcb5289b499}, journal = {CoRR}, month = oct, title = {Best Practices for Scientific Computing}, url = {http://arxiv.org/abs/1210.0530}, volume = {abs/1210.0530}, year = 2012 } @article{ley2009lessons, abstract = {The DBLP Computer Science Bibliography evolved from an early small experimental Web server to a popular service for the computer science community. Many design decisions and details of the public XML-records behind DBLP never were documented. This paper is a review of the evolution of DBLP. The main perspective is data modeling. In DBLP persons play a central role, our discussion of person names may be applicable to many other data bases. All DBLP data are available for your own experiments. You may either download the complete set, or use a simple XML-based API described in an online appendix.}, acmid = {1687577}, author = {Ley, Michael}, interhash = {a75ae2987d55512b7d0731c7a11a1722}, intrahash = {bb968ff4ba9ae93bc80ba05d16a98ff4}, issn = {2150-8097}, issue_date = {August 2009}, journal = {Proceedings of the VLDB Endowment}, month = aug, number = 2, numpages = {8}, pages = {1493--1500}, publisher = {VLDB Endowment}, title = {DBLP: some lessons learned}, url = {http://dl.acm.org/citation.cfm?id=1687553.1687577}, volume = 2, year = 2009 } @article{ortega2009mapping, abstract = {A visual display of the most important universities in the world is the aim of this paper. It shows the topological characteristics and describes the web relationships among universities of different countries and continents. The first 1000 higher education institutions from the Ranking Web of World Universities were selected and their link relationships were obtained from Yahoo! Search. Network graphs and geographical maps were built from the search engine data. Social network analysis techniques were used to analyse and describe the structural properties of the whole of the network and its nodes. The results show that the world-class university network is constituted from national sub-networks that merge in a central core where the principal universities of each country pull their networks toward international link relationships. The United States dominates the world network, and within Europe the British and the German sub-networks stand out.}, author = {Ortega, Jose Luis and Aguillo, Isidro F.}, doi = {10.1016/j.ipm.2008.10.001}, interhash = {1c46addf1c5019aa75a11365a35da757}, intrahash = {f5960b8cb33d8b10b82abadd17b9a4e7}, issn = {0306-4573}, journal = {Information Processing & Management}, number = 2, pages = {272--279}, title = {Mapping world-class universities on the web}, url = {http://www.sciencedirect.com/science/article/pii/S0306457308001015}, volume = 45, year = 2009 } @article{ortega2008academic, abstract = {This paper shows maps of the web presence of the European Higher Education Area (EHEA) on the level of universities using hyperlinks and analyses the topology of the European academic network. Its purpose is to combine methods from Social Network Analysis (SNA) and cybermetric techniques in order to ask for tendencies of integration of the European universities visible in their web presence and the role of different universities in the process of the emergence of an European Research Area. We find as a main result that the European network is set up by the aggregation of well-defined national networks, whereby the German and British networks are dominant. The national networks are connected to each other through outstanding national universities in each country.}, affiliation = {CINDOC-CSIC Cybermetrics Lab Joaquín Costa, 22 28002 Madrid Spain}, author = {Ortega, Jose and Aguillo, Isidro and Cothey, Viv and Scharnhorst, Andrea}, doi = {10.1007/s11192-008-0218-9}, interhash = {9c9dcd171a50875d82f75f5a12d3c41f}, intrahash = {22102ef820bb00f432c842edf6e98a1f}, issn = {0138-9130}, journal = {Scientometrics}, keyword = {Computer Science}, number = 2, pages = {295--308}, publisher = {Akadémiai Kiadó}, title = {Maps of the academic web in the European Higher Education Area — an exploration of visual web indicators}, url = {http://dx.doi.org/10.1007/s11192-008-0218-9}, volume = 74, year = 2008 } @article{cho2006stanford, abstract = {We describe the design and performance of WebBase, a tool for Web research. The system includes a highly customizable crawler, a repository for collected Web pages, an indexer for both text and link-related page features, and a high-speed content distribution facility. The distribution module enables researchers world-wide to retrieve pages from WebBase, and stream them across the Internet at high speed. The advantage for the researchers is that they need not all crawl the Web before beginning their research. WebBase has been used by scores of research and teaching organizations world-wide, mostly for investigations into Web topology and linguistic content analysis. After describing the system's architecture, we explain our engineering decisions for each of the WebBase components, and present respective performance measurements.}, acmid = {1149124}, address = {New York, NY, USA}, author = {Cho, Junghoo and Garcia-Molina, Hector and Haveliwala, Taher and Lam, Wang and Paepcke, Andreas and Raghavan, Sriram and Wesley, Gary}, doi = {10.1145/1149121.1149124}, interhash = {bebbc072ea2dccf4c2b27abf244c1f08}, intrahash = {3cd21bf8a87619e0489b8da177c9f0b4}, issn = {1533-5399}, issue_date = {May 2006}, journal = {ACM Transactions on Internet Technology}, month = may, number = 2, numpages = {34}, pages = {153--186}, publisher = {ACM}, title = {Stanford WebBase components and applications}, url = {http://doi.acm.org/10.1145/1149121.1149124}, volume = 6, year = 2006 } @article{stirling2012archives, abstract = {The Internet has been covered by legal deposit legislation in France since 2006, making web archiving one of the missions of the Bibliothèque nationale de France (BnF). Access to the web archives has been provided in the library on an experimental basis since 2008. In the context of increasing interest in many countries in web archiving and how it may best serve the needs of researchers, especially in the expanding field of Internet studies for social sciences, a qualitative study was performed, based on interviews with potential users of the web archives held at the BnF, and particularly researchers working in various areas related to the Internet. The study aimed to explore their needs in terms of both content and services, and also to analyse different ways of representing the archives, in order to identify ways of increasing their use. While the interest of maintaining the "memory" of the web is obvious to the researchers, they are faced with the difficulty of defining, in what is a seemingly limitless space, meaningful collections of documents. Cultural heritage institutions such as national libraries are perceived as trusted third parties capable of creating rationally-constructed and well-documented collections, but such archives raise certain ethical and methodological questions.}, author = {Stirling, Peter and Chevallier, Philippe and Illien, Gildas}, doi = {10.1045/march2012-stirling}, interhash = {a783191c99a285197525595ebf509bb2}, intrahash = {4f7840193e7e435ad5dd0003fc93691a}, issn = {1082-9873}, journal = {D-Lib Magazine}, month = {March/April }, number = {3/4}, title = {Web Archives for Researchers: Representations, Expectations and Potential Uses}, url = {http://www.dlib.org/dlib/march12/stirling/03stirling.html}, volume = 18, year = 2012 } @mastersthesis{olson2012cloud, abstract = {My thesis describes the design and implementation of systems that empower individuals to help their communities respond to critical situations and to participate in research that helps them understand and improve their environments. People want to help their communities respond to threats such as earthquakes, wildfires, mudslides and hurricanes, and they want to participate in research that helps them understand and improve their environment. “Citizen Science” projects that facilitate this interaction include projects that monitor climate change, water quality and animal habitats. My thesis explores the design and analysis of community-based sense and response systems that enable individuals to participate in critical community activities and scientific research that monitors their environments.}, author = {Olson, Michael J.}, interhash = {a9cdee464e76cd5210c13d7f66981e83}, intrahash = {d9e22a1a5e9404a805aee5cb0fd406c4}, school = {California Institute of Technology}, title = {Cloud computing for citizen science}, type = {Master's thesis}, url = {http://resolver.caltech.edu/CaltechTHESIS:08232011-122341638}, year = 2012 } @article{pham2011development, abstract = {In contrast to many other scientific disciplines, computer science considers conference publications. Conferences have the advantage of providing fast publication of papers and of bringing researchers together to present and discuss the paper with peers. Previous work on knowledge mapping focused on the map of all sciences or a particular domain based on ISI published Journal Citation Report (JCR). Although this data cover most of the important journals, it lacks computer science conference and workshop proceedings, which results in an imprecise and incomplete analysis of the computer science knowledge. This paper presents an analysis on the computer science knowledge network constructed from all types of publications, aiming at providing a complete view of computer science research. Based on the combination of two important digital libraries (DBLP and CiteSeerX), we study the knowledge network created at journal/conference level using citation linkage, to identify the development of sub-disciplines. We investigate the collaborative and citation behavior of journals/conferences by analyzing the properties of their co-authorship and citation subgraphs. The paper draws several important conclusions. First, conferences constitute social structures that shape the computer science knowledge. Second, computer science is becoming more interdisciplinary. Third, experts are the key success factor for sustainability of journals/conferences.}, address = {Wien}, affiliation = {Information Systems and Database Technology, RWTH Aachen University, Aachen, Ahornstr. 55, 52056 Aachen, Germany}, author = {Pham, Manh and Klamma, Ralf and Jarke, Matthias}, doi = {10.1007/s13278-011-0024-x}, interhash = {193312234ed176aa8be9f35d4d1c4e72}, intrahash = {8ae08cacda75da80bfa5604cfce48449}, issn = {1869-5450}, journal = {Social Network Analysis and Mining}, keyword = {Computer Science}, number = 4, pages = {321--340}, publisher = {Springer}, title = {Development of computer science disciplines: a social network analysis approach}, url = {http://dx.doi.org/10.1007/s13278-011-0024-x}, volume = 1, year = 2011 } @article{bernerslee2006creating, abstract = {Since its inception, the World Wide Web has changed the ways scientists communicate, collaborate, and educate. There is, however, a growing realization among many researchers that a clear research agenda aimed at understanding the current, evolving, and potential Web is needed. If we want to model the Web; if we want to understand the architectural principles that have provided for its growth; and if we want to be sure that it supports the basic social values of trustworthiness, privacy, and respect for social boundaries, then we must chart out a research agenda that targets the Web as a primary focus of attention.}, author = {Berners-Lee, Tim and Hall, Wendy and Hendler, James and Shadbolt, Nigel and Weitzner, Daniel J.}, doi = {10.1126/science.1126902}, eprint = {http://www.sciencemag.org/content/313/5788/769.full.pdf}, interhash = {4faeccd1fb26fbc059558be4ce111c6d}, intrahash = {6daebd9940f9fe0c3a3da39001efa9a0}, journal = {Science}, number = 5788, pages = {769--771}, title = {Creating a Science of the Web}, url = {http://www.sciencemag.org/content/313/5788/769.short}, volume = 313, year = 2006 } @article{borrego2012measuring, abstract = {This paper explores the possibility of using data from social bookmarking services to measure the use of information by academic researchers. Social bookmarking data can be used to augment participative methods (e.g. interviews and surveys) and other, non-participative methods (e.g. citation analysis and transaction logs) to measure the use of scholarly information. We use BibSonomy, a free resource-sharing system, as a case study. Results show that published journal articles are by far the most popular type of source bookmarked, followed by conference proceedings and books. Commercial journal publisher platforms are the most popular type of information resource bookmarked, followed by websites, records in databases and digital repositories. Usage of open access information resources is low in comparison with toll access journals. In the case of open access repositories, there is a marked preference for the use of subject-based repositories over institutional repositories. The results are consistent with those observed in related studies based on surveys and citation analysis, confirming the possible use of bookmarking data in studies of information behaviour in academic settings. The main advantages of using social bookmarking data are that is an unobtrusive approach, it captures the reading habits of researchers who are not necessarily authors, and data are readily available. The main limitation is that a significant amount of human resources is required in cleaning and standardizing the data.}, author = {Borrego, Ángel and Fry, Jenny}, doi = {10.1177/0165551512438353}, eprint = {http://jis.sagepub.com/content/38/3/297.full.pdf+html}, interhash = {71ddfdd5b3d99b1a2986b4ded5e02b3c}, intrahash = {e5ccbb3378eeb88e7288d8ce59539812}, journal = {Journal of Information Science}, number = 3, pages = {297--308}, title = {Measuring researchers' use of scholarly information through social bookmarking data: A case study of BibSonomy}, url = {http://jis.sagepub.com/content/38/3/297.abstract}, volume = 38, year = 2012 } @inproceedings{lawrence1999indexing, abstract = {The web has greatly improved access to scientific literature. However, scientific articles on the web are largely disorganized, with research articles being spread across archive sites, institution sites, journal sites, and researcher homepages. No index covers all of the available literature, and the major web search engines typically do not index the content of Postscript/PDF documents at all. This paper discusses the creation of digital libraries of scientific literature on the web, including the efficient location of articles, full-text indexing of the articles, autonomous citation indexing, information extraction, display of query-sensitive summaries and citation context, hubs and authorities computation, similar document detection, user profiling, distributed error correction, graph analysis, and detection of overlapping documents. The software for the system is available at no cost for non-commercial use.}, acmid = {319970}, address = {New York, NY, USA}, author = {Lawrence, Steve and Bollacker, Kurt and Giles, C. Lee}, booktitle = {Proceedings of the eighth international conference on Information and knowledge management}, doi = {10.1145/319950.319970}, interhash = {09c20b905496b3fba782688018d948b0}, intrahash = {8f79ea9ca0db12c8bf853dcceed20eb3}, isbn = {1-58113-146-1}, location = {Kansas City, Missouri, United States}, numpages = {8}, pages = {139--146}, publisher = {ACM}, title = {Indexing and retrieval of scientific literature}, url = {http://doi.acm.org/10.1145/319950.319970}, year = 1999 }