@article{kluegl2013exploiting, abstract = {Conditional Random Fields (CRF) are popular methods for labeling unstructured or textual data. Like many machine learning approaches, these undirected graphical models assume the instances to be independently distributed. However, in real-world applications data is grouped in a natural way, e.g., by its creation context. The instances in each group often share additional structural consistencies. This paper proposes a domain-independent method for exploiting these consistencies by combining two CRFs in a stacked learning framework. We apply rule learning collectively on the predictions of an initial CRF for one context to acquire descriptions of its specific properties. Then, we utilize these descriptions as dynamic and high quality features in an additional (stacked) CRF. The presented approach is evaluated with a real-world dataset for the segmentation of references and achieves a significant reduction of the labeling error.}, author = {Kluegl, Peter and Toepfer, Martin and Lemmerich, Florian and Hotho, Andreas and Puppe, Frank}, interhash = {9ef3f543e4cc9e2b0ef078595f92013b}, intrahash = {fbaab25e96dd20d96ece9d7fefdc3b4f}, journal = {Mathematical Methodologies in Pattern Recognition and Machine Learning Springer Proceedings in Mathematics & Statistics}, pages = {111-125}, title = {Exploiting Structural Consistencies with Stacked Conditional Random Fields}, volume = 30, year = 2013 } @inproceedings{toepfer2011segmentation, author = {Toepfer, Martin and Kluegl, Peter and Hotho, Andreas and Puppe, Frank}, booktitle = {Workshop Notes of the LWA 2011 - Learning, Knowledge, Adaptation}, interhash = {3bd61ad3f9b4f1e221e79ecb3b4cae39}, intrahash = {b707fa6ddf5b3010827868ecebc60d6a}, title = {Segmentation of References with Skip-Chain Conditional Random Fields for Consistent Label Transitions}, url = {http://ki.informatik.uni-wuerzburg.de/papers/pkluegl/2011-LWA-SkYp.pdf}, year = 2011 } @inproceedings{2010-KI-KHP, abstract = {The accurate extraction of scholarly reference information from scientific publications is essential for many useful applications like BibTeX management systems or citation analysis. Automatic extraction methods suffer from the heterogeneity of reference notation, no matter wether the extraction model was handcrafted or learnt from labeled data. However, references of the same paper or journal are usually homogeneous. We exploit this local consistency with a novel approach. Given some initial information from such a reference section, we try to derived generalized patterns. These patterns are used to create a local model of the current document. The local model helps to identify errors and to improve the extracted information incrementally during the extraction process. Our approach is implemented with handcrafted transformation rules working on a meta-level being able to correct the information independent of the applied layout style. The experimental results compete very well with the state of the art methods and show an extremely high performance on consistent reference sections. }, author = {Kluegl, Peter and Hotho, Andreas and Puppe, Frank}, booktitle = {KI 2010: Advances in Artificial Intelligence, 33rd Annual German Conference on AI}, editor = {Dillmann, Rüdiger and Beyerer, Jürgen and Hanebeck, Uwe D. and Schultz, Tanja}, interhash = {b6a5b2a32346b60eac912ee96e681dce}, intrahash = {174791d9668705cbf0052224694f5366}, isbn = {978-3-642-16110-0}, pages = {40-47}, publisher = {Springer}, series = { LNAI 6359}, title = {Local Adaptive Extraction of References}, url = {http://ki.informatik.uni-wuerzburg.de/papers/pkluegl/2010-KI-LAER.pdf}, year = 2010 }