@phdthesis{radoulov2008exploring, abstract = { Currently, citation indexes used by digital libraries are very limited. They only provide raw citation counts and link scientific articles through their citations. There are more than one type of citations, but citation indexes treat all citations equally. One way to improve citation indexes is to determine the types of citations in scientific articles (background, support, perfunctory reference, etc.) This will enable researchers to query citation indexes more efficiently by locating articles grouped by citation types. For example, it can enable a researcher to locate all background material needed to understand a specific article by locating all "background" citations. Many classification schemes currently exist. However, manual annotation of all existing digital documents is infeasible because of the sheer magnitude of the digital content, which brings about the need for automating the annotating process, but not much research has been done in the area. One of the reasons preventing researchers from researching automated citation classification is the lack on annotated corpora that they can use. This thesis explores automated citation classification. We make several contributions to the field of citation classification. We present a new citation scheme that is easier to work with than most. Also, we present a document acquisition and citation annotation tool that helps with the development of annotated citation corpora. And finally, we present some experiments with automating citation classification.}, author = {Radoulov, Radoslav}, doi = {10012/3712}, interhash = {5ced35acf5fa742be8d512d93cd2e788}, intrahash = {91d354c2837da41669de4e85de44cd81}, month = may, school = {School of Computer Science, University of Waterloo}, title = {Exploring Automatic Citation Classification}, url = {http://hdl.handle.net/10012/3712}, year = 2008 } @inproceedings{marco2004hedging, abstract = {Citations in scientific writing fulfil an important role in creating relationships among mutually relevant articles within a research field. These inter-article relationships reinforce the argumentation structure intrinsic to all scientific writing. Therefore, determining the nature of the exact relationship between a citing and cited paper requires an understanding of the rhetorical relations within the argumentative context in which a citation is placed. To automatically determine these relations, we have suggested that various stylistic and rhetorical cues will be significant. One such cue that we are studying is the use of hedging to modify the affect of a scientific claim. We have previously shown that hedging occurs more frequently in citation contexts than in the text as a whole. With this information we conjecture that hedging is a significant aspect of the rhetorical structure of citation contexts and that the pragmatics of hedges may help in determining the rhetorical purpose of citations.}, author = {Marco, Chrysanne Di and Mercer, Robert E.}, booktitle = {Proc. AAAI Spring Symposium}, interhash = {7c4532214d65b2ef202fd2a7d7311901}, intrahash = {90a6c053558d98c9bfdbb6b4d6d8d725}, organization = {AAAI}, title = {Hedging in Scientific Articles as a Means of Classifying Citations}, url = {http://www.aaai.org/Library/Symposia/Spring/2004/ss04-07-009.php}, year = 2004 } @article{nanba2000classification, abstract = {We are investigating automatic generation of a review (or survey) article in a specific subject domain. In a research paper, there are passages where the author describes the essence of a cited paper and the differences between the current paper and the cited paper (we call them citing areas). These passages can be considered as a kind of summary of the cited paper from the current author's viewpoint. We can know the state of the art in a specific subject domain from the collection of citing areas. FUrther, if these citing areas are properly classified and organized, they can act 8.', a kind of a review article. In our previous research, we proposed the automatic extraction of citing areas. Then, with the information in the citing areas, we automatically identified the types of citation relationships that indicate the reasons for citation (we call them citation types). Citation types offer a useful clue for organizing citing areas. In addition, to support writing a review article, it is necessary to take account of the contents of the papers together with the citation links and citation types. In this paper, we propose several methods for classifying papers automatically. We found that our proposed methods BCCT-C, the bibliographic coupling considering only type C citations, which pointed out the problems or gaps in related works, are more effective than others. We also implemented a prototype system to support writing a review article, which is based on our proposed method.}, author = {Nanba, H. and Kando, N. and Okumura, M.}, interhash = {a8fbc36d3ee8de28f65ef2486bb18cd2}, intrahash = {7a99ee2d1444ae569beb7bee04137e4b}, journal = {11th ASIS SIG/CR Classification Research Workshop}, misc = {10.7152/acro.v11i1.12774}, pages = {117--134}, title = {Classification of research papers using citation links and citation types: Towards automatic review article generation}, url = {http://journals.lib.washington.edu/index.php/acro/article/download/12774/11255}, year = 2000 } @article{liu2012fulltext, author = {Liu, Xiaozhong and Zhang, Jinsong and Guo, Chun}, interhash = {011df26355ad51a88947017fd2791a98}, intrahash = {f9c6133bf4503003822f99860f864698}, journal = {Journal of the American Society for Information Science and Technology}, title = {Full-Text Citation Analysis: A New Method to Enhance Scholarly Network}, url = {http://discern.uits.iu.edu:8790/publication/Full%20text%20citation.pdf}, year = 2012 } @inproceedings{yan2012better, abstract = {Usually scientists breed research ideas inspired by previous publications, but they are unlikely to follow all publications in the unbounded literature collection. The volume of literature keeps on expanding extremely fast, whilst not all papers contribute equal impact to the academic society. Being aware of potentially influential literature would put one in an advanced position in choosing important research references. Hence, estimation of potential influence is of great significance. We study a challenging problem of identifying potentially influential literature. We examine a set of hypotheses on what are the fundamental characteristics for highly cited papers and find some interesting patterns. Based on these observations, we learn to identify potentially influential literature via Future Influence Prediction (FIP), which aims to estimate the future influence of literature. The system takes a series of features of a particular publication as input and produces as output the estimated citation counts of that article after a given time period. We consider several regression models to formulate the learning process and evaluate their performance based on the coefficient of determination (R2). Experimental results on a real-large data set show a mean average predictive performance of 83.6% measured in R^2. We apply the learned model to the application of bibliography recommendation and obtain prominent performance improvement in terms of Mean Average Precision (MAP).}, acmid = {2232831}, address = {New York, NY, USA}, author = {Yan, Rui and Huang, Congrui and Tang, Jie and Zhang, Yan and Li, Xiaoming}, booktitle = {Proceedings of the 12th ACM/IEEE-CS joint conference on Digital Libraries}, doi = {10.1145/2232817.2232831}, interhash = {85d10c6d37bcbfa057c51acc325a8116}, intrahash = {9269d2dd9bf4bc8c0e7c668011fcfc1b}, isbn = {978-1-4503-1154-0}, location = {Washington, DC, USA}, numpages = {10}, pages = {51--60}, publisher = {ACM}, series = {JCDL '12}, title = {To better stand on the shoulder of giants}, url = {http://doi.acm.org/10.1145/2232817.2232831}, year = 2012 } @inproceedings{yan2011citation, abstract = {In most of the cases, scientists depend on previous literature which is relevant to their research fields for developing new ideas. However, it is not wise, nor possible, to track all existed publications because the volume of literature collection grows extremely fast. Therefore, researchers generally follow, or cite merely a small proportion of publications which they are interested in. For such a large collection, it is rather interesting to forecast which kind of literature is more likely to attract scientists' response. In this paper, we use the citations as a measurement for the popularity among researchers and study the interesting problem of Citation Count Prediction (CCP) to examine the characteristics for popularity. Estimation of possible popularity is of great significance and is quite challenging. We have utilized several features of fundamental characteristics for those papers that are highly cited and have predicted the popularity degree of each literature in the future. We have implemented a system which takes a series of features of a particular publication as input and produces as output the estimated citation counts of that article after a given time period. We consider several regression models to formulate the learning process and evaluate their performance based on the coefficient of determination (R-square). Experimental results on a real-large data set show that the best predictive model achieves a mean average predictive performance of 0.740 measured in R-square, which significantly outperforms several alternative algorithms.}, acmid = {2063757}, address = {New York, NY, USA}, author = {Yan, Rui and Tang, Jie and Liu, Xiaobing and Shan, Dongdong and Li, Xiaoming}, booktitle = {Proceedings of the 20th ACM international conference on Information and knowledge management}, doi = {10.1145/2063576.2063757}, interhash = {71ec0933a36df3dd21f38285bdf9b1b0}, intrahash = {b0caabb6e17d9b790d3f13c897330aad}, isbn = {978-1-4503-0717-8}, location = {Glasgow, Scotland, UK}, numpages = {6}, pages = {1247--1252}, publisher = {ACM}, series = {CIKM '11}, title = {Citation count prediction: learning to estimate future citations for literature}, url = {http://doi.acm.org/10.1145/2063576.2063757}, year = 2011 } @article{boerner2012design, abstract = {Global maps of science can be used as a reference system to chart career trajectories, the location of emerging research frontiers, or the expertise profiles of institutes or nations. This paper details data preparation, analysis, and layout performed when designing and subsequently updating the UCSD map of science and classification system. The original classification and map use 7.2 million papers and their references from Elsevier’s Scopus (about 15,000 source titles, 2001–2005) and Thomson Reuters’ Web of Science (WoS) Science, Social Science, Arts & Humanities Citation Indexes (about 9,000 source titles, 2001–2004)–about 16,000 unique source titles. The updated map and classification adds six years (2005–2010) of WoS data and three years (2006–2008) from Scopus to the existing category structure–increasing the number of source titles to about 25,000. To our knowledge, this is the first time that a widely used map of science was updated. A comparison of the original 5-year and the new 10-year maps and classification system show (i) an increase in the total number of journals that can be mapped by 9,409 journals (social sciences had a 80% increase, humanities a 119% increase, medical (32%) and natural science (74%)), (ii) a simplification of the map by assigning all but five highly interdisciplinary journals to exactly one discipline, (iii) a more even distribution of journals over the 554 subdisciplines and 13 disciplines when calculating the coefficient of variation, and (iv) a better reflection of journal clusters when compared with paper-level citation data. When evaluating the map with a listing of desirable features for maps of science, the updated map is shown to have higher mapping accuracy, easier understandability as fewer journals are multiply classified, and higher usability for the generation of data overlays, among others.}, author = {Börner, Katy and Klavans, Richard and Patek, Michael and Zoss, Angela M. and Biberstine, Joseph R. and Light, Robert P. and Larivière, Vincent and Boyack, Kevin W.}, doi = {10.1371/journal.pone.0039464}, interhash = {c27eeafd6c2d77f7022ce10236e3dd47}, intrahash = {494fdcbea8cd30a1a04a06aa1696fce6}, journal = {PLoS ONE}, month = jul, number = 7, pages = {e39464}, publisher = {Public Library of Science}, title = {Design and Update of a Classification System: The UCSD Map of Science}, url = {http://dx.doi.org/10.1371%2Fjournal.pone.0039464}, volume = 7, year = 2012 } @article{larowe2009scholarly, abstract = {The Scholarly Database aims to serve researchers and practitioners interested in the analysis, modelling, and visualization of large-scale data sets. A specific focus of this database is to support macro-evolutionary studies of science and to communicate findings via knowledge-domain visualizations. Currently, the database provides access to about 18 million publications, patents, and grants. About 90% of the publications are available in full text. Except for some datasets with restricted access conditions, the data can be retrieved in raw or pre-processed formats using either a web-based or a relational database client. This paper motivates the need for the database from the perspective of bibliometric/scientometric research. It explains the database design, setup, etc., and reports the temporal, geographical, and topic coverage of data sets currently served via the database. Planned work and the potential for this database to become a global testbed for information science research are discussed at the end of the paper.}, author = {La Rowe, Gavin and Ambre, Sumeet and Burgoon, John and Ke, Weimao and Börner, Katy}, doi = {10.1007/s11192-009-0414-2}, interhash = {1819f263b0ea1b99ec15d0c22b38207e}, intrahash = {c24611ec1f2efbdcf7f5b26d49af320e}, issn = {0138-9130}, journal = {Scientometrics}, language = {English}, number = 2, pages = {219--234}, publisher = {Springer Netherlands}, title = {The Scholarly Database and its utility for scientometrics research}, url = {http://dx.doi.org/10.1007/s11192-009-0414-2}, volume = 79, year = 2009 } @article{dunne2012rapid, abstract = {Keeping up with rapidly growing research fields, especially when there are multiple interdisciplinary sources, requires substantial effort for researchers, program managers, or venture capital investors. Current theories and tools are directed at finding a paper or website, not gaining an understanding of the key papers, authors, controversies, and hypotheses. This report presents an effort to integrate statistics, text analytics, and visualization in a multiple coordinated window environment that supports exploration. Our prototype system, Action Science Explorer (ASE), provides an environment for demonstrating principles of coordination and conducting iterative usability tests of them with interested and knowledgeable users. We developed an understanding of the value of reference management, statistics, citation text extraction, natural language summarization for single and multiple documents, filters to interactively select key papers, and network visualization to see citation patterns and identify clusters. A three-phase usability study guided our revisions to ASE and led us to improve the testing methods.}, author = {Dunne, Cody and Shneiderman, Ben and Gove, Robert and Klavans, Judith and Dorr, Bonnie}, doi = {10.1002/asi.22652}, interhash = {f031d712f64663242af6b6ec95b74f48}, intrahash = {045df67628ff0ae9b75bb1ecf915d025}, issn = {1532-2890}, journal = {Journal of the American Society for Information Science and Technology}, number = 12, pages = {2351--2369}, title = {Rapid understanding of scientific paper collections: Integrating statistics, text analytics, and visualization}, url = {http://dx.doi.org/10.1002/asi.22652}, volume = 63, year = 2012 } @inproceedings{mohammad2009using, abstract = {The number of research publications in various disciplines is growing exponentially. Researchers and scientists are increasingly finding themselves in the position of having to quickly understand large amounts of technical material. In this paper we present the first steps in producing an automatically generated, readily consumable, technical survey. Specifically we explore the combination of citation information and summarization techniques. Even though prior work (Teufel et al., 2006) argues that citation text is unsuitable for summarization, we show that in the framework of multi-document survey creation, citation texts can play a crucial role.}, acmid = {1620839}, address = {Stroudsburg, PA, USA}, author = {Mohammad, Saif and Dorr, Bonnie and Egan, Melissa and Hassan, Ahmed and Muthukrishan, Pradeep and Qazvinian, Vahed and Radev, Dragomir and Zajic, David}, booktitle = {Proceedings of Human Language Technologies: The 2009 Annual Conference of the North American Chapter of the Association for Computational Linguistics}, interhash = {7921a9a20f6780be90327aa4e104214e}, intrahash = {b6cd30a439667d97f12241836738203c}, isbn = {978-1-932432-41-1}, location = {Boulder, Colorado}, numpages = {9}, pages = {584--592}, publisher = {Association for Computational Linguistics}, series = {NAACL '09}, title = {Using citations to generate surveys of scientific paradigms}, url = {http://dl.acm.org/citation.cfm?id=1620754.1620839}, year = 2009 }