@inproceedings{Teufel01task-basedevaluation, abstract = {We present a novel method for task-based evalua- tion of summaries of scientific articles. The task we propose is a question-answering task, where the questions are about the relatedness of the current paper to prior research. This evaluation method is time-efficient with respect to material preparation and data collection, so that it is possible to test against many different baselines, something that is not usually feasible in evaluations by relevance decision. We use this methodology to evaluate the quality of summaries our system produces. These summaries are designed to describe the contribution of a scientific article in relation to other work. The re- sults show that this type of summary is indeed more useful than the baselines (random sentences, keyword lists and generic author-written summaries), and nearly as useful as the full texts.}, author = {Teufel, Simone}, booktitle = {In Workshop Automatic Summarization, NAACL}, interhash = {ed0c6de01aa6b0a3ef369627eb689cf3}, intrahash = {b95470c8eae5d8f0372d20215c35f236}, pages = {12--21}, title = {Task-Based Evaluation of Summary Quality: Describing Relationships between Scientific Papers}, url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.12.8139}, year = 2001 } @article{Teufel02summarizingscientific, abstract = {this paper we argue that scientific articles require a different summarization strategy than, for instance, news articles. We propose a strategy which concentrates on the rhetorical status of statements in the article: Material for summaries is selected in such a way that summaries can highlight the new contribution of the source paper and situate it with respect to earlier work. We provide a gold standard for summaries of this kind consisting of a substantial corpus of conference articles in computational linguistics with human judgements of rhetorical status and relevance. We present several experiments measuring our judges' agreement on these annotations. We also present an algorithm which, on the basis of the annotated training material, selects content and classifies it into a fixed set of seven rhetorical categories. The output of this extraction and classification system can be viewed as a single-document summary in its own right; alternatively, it can be used to generate task-oriented and user-tailored summaries designed to give users an overview of a scientific field.}, author = {Teufel, Simone and Moens, Marc}, interhash = {5062ef01775fa6300141a99937d0f1cd}, intrahash = {7b5e363f72b4351d3afba8f2b369bed6}, journal = {Computational Linguistics}, pages = 2002, title = {Summarizing Scientific Articles - Experiments with Relevance and Rhetorical Status}, url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.27.5593}, volume = 28, year = 2002 }