@inproceedings{lafferty2001conditional, acmid = {655813}, address = {San Francisco, CA, USA}, author = {Lafferty, John D. and McCallum, Andrew and Pereira, Fernando C. N.}, booktitle = {Proceedings of the Eighteenth International Conference on Machine Learning}, interhash = {574c59001ecc3aa04850e1751d96c137}, intrahash = {180c5d6097317fa1b19ca8df75341230}, isbn = {1-55860-778-1}, numpages = {8}, pages = {282--289}, publisher = {Morgan Kaufmann Publishers Inc.}, title = {Conditional Random Fields: Probabilistic Models for Segmenting and Labeling Sequence Data}, url = {http://dl.acm.org/citation.cfm?id=645530.655813}, year = 2001 } @inproceedings{kristjansson2004interactive, abstract = {Information Extraction methods can be used to automatically "fill-in" database forms from unstructured data such as Web documents or email. State-of-the-art methods have achieved low error rates but invariably make a number of errors. The goal of an interactive information extraction system is to assist the user in filling in database fields while giving the user confidence in the integrity of the data. The user is presented with an interactive interface that allows both the rapid verification of automatic field assignments and the correction of errors. In cases where there are multiple errors, our system takes into account user corrections, and immediately propagates these constraints such that other fields are often corrected automatically. Linear-chain conditional random fields (CRFs) have been shown to perform well for information extraction and other language modelling tasks due to their ability to capture arbitrary, overlapping features of the input in aMarkov model. We apply this framework with two extensions: a constrained Viterbi decoding which finds the optimal field assignments consistent with the fields explicitly specified or corrected by the user; and a mechanism for estimating the confidence of each extracted field, so that low-confidence extractions can be highlighted. Both of these mechanisms are incorporated in a novel user interface for form filling that is intuitive and speeds the entry of data—providing a 23% reduction in error due to automated corrections.}, author = {Kristjansson, Trausti T. and Culotta, Aron and Viola, Paul A. and McCallum, Andrew}, booktitle = {AAAI}, editor = {McGuinness, Deborah L. and Ferguson, George}, interhash = {89fe7fe6ef4c088b10d3b0b0aabeaf46}, intrahash = {fe6cb1dbef3216852a63a625a30799d6}, isbn = {0-262-51183-5}, pages = {412--418}, publisher = {AAAI Press/The MIT Press}, title = {Interactive Information Extraction with Constrained Conditional Random Fields.}, url = {http://dblp.uni-trier.de/db/conf/aaai/aaai2004.html#KristjanssonCVM04}, year = 2004 } @misc{sutton2010introduction, abstract = { Often we wish to predict a large number of variables that depend on each other as well as on other observed variables. Structured prediction methods are essentially a combination of classification and graphical modeling, combining the ability of graphical models to compactly model multivariate data with the ability of classification methods to perform prediction using large sets of input features. This tutorial describes conditional random fields, a popular probabilistic method for structured prediction. CRFs have seen wide application in natural language processing, computer vision, and bioinformatics. We describe methods for inference and parameter estimation for CRFs, including practical issues for implementing large scale CRFs. We do not assume previous knowledge of graphical modeling, so this tutorial is intended to be useful to practitioners in a wide variety of fields. }, author = {Sutton, Charles and McCallum, Andrew}, interhash = {05e1b6859124c5bf51c7aafd63f779b0}, intrahash = {49d8c9beb76a8b88739aa9eece7446ee}, note = {cite arxiv:1011.4088Comment: 90 pages}, title = {An Introduction to Conditional Random Fields}, url = {http://arxiv.org/abs/1011.4088}, year = 2010 } @inproceedings{kluegl2012stacked, abstract = {Conditional Random Fields CRF are popular methods for labeling unstructured or textual data. Like many machine learning approaches these undirected graphical models assume the instances to be independently distributed. However, in real world applications data is grouped in a natural way, e.g., by its creation context. The instances in each group often share additional structural consistencies. This paper proposes a domain-independent method for exploiting these consistencies by combining two CRFs in a stacked learning framework. The approach incorporates three successive steps of inference: First, an initial CRF processes single instances as usual. Next, we apply rule learning collectively on all labeled outputs of one context to acquire descriptions of its specific properties. Finally, we utilize these descriptions as dynamic and high quality features in an additional stacked CRF. The presented approach is evaluated with a real-world dataset for the segmentation of references and achieves a significant reduction of the labeling error.}, address = {Vilamoura, Algarve, Portugal}, author = {Klügl, Peter and Toepfer, Martin and Lemmerich, Florian and Hotho, Andreas and Puppe, Frank}, booktitle = {Proceedings of 1st International Conference on Pattern Recognition Applications and Methods ICPRAM}, editor = {Carmona, Pedro Latorre and Sánchez, J. Salvador and Fred, Ana}, interhash = {74969e59c5637d192021e35bbd02bece}, intrahash = {7920d13d4fce68bb9a4947585083986e}, pages = {240-248}, publisher = {SciTePress}, title = {Stacked Conditional Random Fields Exploiting Structural Consistencies}, url = {http://ki.informatik.uni-wuerzburg.de/papers/pkluegl/2012-ICPRAM-StackedCRF.pdf}, year = 2012 } @misc{Sutton2010, abstract = { Often we wish to predict a large number of variables that depend on each other as well as on other observed variables. Structured prediction methods are essentially a combination of classification and graphical modeling, combining the ability of graphical models to compactly model multivariate data with the ability of classification methods to perform prediction using large sets of input features. This tutorial describes conditional random fields, a popular probabilistic method for structured prediction. CRFs have seen wide application in natural language processing, computer vision, and bioinformatics. We describe methods for inference and parameter estimation for CRFs, including practical issues for implementing large scale CRFs. We do not assume previous knowledge of graphical modeling, so this tutorial is intended to be useful to practitioners in a wide variety of fields. }, author = {Sutton, Charles and McCallum, Andrew}, interhash = {05e1b6859124c5bf51c7aafd63f779b0}, intrahash = {49d8c9beb76a8b88739aa9eece7446ee}, note = {cite arxiv:1011.4088Comment: 90 pages}, title = {An Introduction to Conditional Random Fields}, url = {http://arxiv.org/abs/1011.4088}, year = 2010 } @inproceedings{kdml21, abstract = {The accurate extraction of bibliographic information from scientific publications is an active field of research. Machine learning and sequence labeling approaches like Conditional Random Fields (CRF) are often applied for this reference extraction task, but still suffer from the ambiguity of reference notation. Reference sections apply a predefined style guide and contain only homogeneous references. Therefore, other references of the same paper or journal often provide evidence how the fields of a reference are correctly labeled. We propose a novel approach that exploits the similarities within a document. Our process model uses information of unlabeled documents directly during the extraction task in order to automatically adapt to the perceived style guide. This is implemented by changing the manifestation of the features for the applied CRF. The experimental results show considerable improvements compared to the common approach. We achieve an average F1 score of 96.7% and an instance accuracy of 85.4% on the test data set.}, address = {Kassel, Germany}, author = {Toepfer, Martin and Kluegl, Peter and Hotho, Andreas and Puppe., Frank}, booktitle = {Proceedings of LWA2010 - Workshop-Woche: Lernen, Wissen {\&} Adaptivitaet}, crossref = {lwa2010}, editor = {Atzmüller, Martin and Benz, Dominik and Hotho, Andreas and Stumme, Gerd}, interhash = {d8f45281363701bfe7f979b1e13ee269}, intrahash = {37242cd584805b2e4cea0c486008889d}, presentation_end = {2010-10-05 16:45:00}, presentation_start = {2010-10-05 16:22:30}, room = {0446}, session = {kdml2}, title = {Conditional Random Fields For Local Adaptive Reference Extraction}, track = {kdml}, url = {http://www.kde.cs.uni-kassel.de/conf/lwa10/papers/kdml21.pdf}, year = 2010 } @inproceedings{conf/semweb/TangHLL06, author = {Tang, Jie and Hong, MingCai and Li, Juan-Zi and Liang, Bangyong}, booktitle = {International Semantic Web Conference}, crossref = {conf/semweb/2006}, date = {2006-11-09}, editor = {Cruz, Isabel F. and Decker, Stefan and Allemang, Dean and Preist, Chris and Schwabe, Daniel and Mika, Peter and Uschold, Michael and Aroyo, Lora}, ee = {http://dx.doi.org/10.1007/11926078_46}, interhash = {0cd79ca123126fe66d0e2f2888222c79}, intrahash = {e378a25116a480b55e64a919a351f1a7}, isbn = {3-540-49029-9}, pages = {640-653}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, title = {Tree-Structured Conditional Random Fields for Semantic Annotation.}, url = {http://dblp.uni-trier.de/db/conf/semweb/iswc2006.html#TangHLL06}, volume = 4273, year = 2006 } @inproceedings{peng2004accurate, author = {Peng, Fuchun and McCallum, Andrew}, booktitle = {HLT-NAACL}, interhash = {8f9ef6b359fef3bd08bfed653fe1bb55}, intrahash = {8d04bc19e470fe4b98e15a27a1e6e7e9}, pages = {329--336}, title = {Accurate Information Extraction from Research Papers using Conditional Random Fields}, url = {http://acl.ldc.upenn.edu/hlt-naacl2004/main/pdf/176_Paper.pdf}, year = 2004 }