@article{kluegl2013exploiting, abstract = {Conditional Random Fields (CRF) are popular methods for labeling unstructured or textual data. Like many machine learning approaches, these undirected graphical models assume the instances to be independently distributed. However, in real-world applications data is grouped in a natural way, e.g., by its creation context. The instances in each group often share additional structural consistencies. This paper proposes a domain-independent method for exploiting these consistencies by combining two CRFs in a stacked learning framework. We apply rule learning collectively on the predictions of an initial CRF for one context to acquire descriptions of its specific properties. Then, we utilize these descriptions as dynamic and high quality features in an additional (stacked) CRF. The presented approach is evaluated with a real-world dataset for the segmentation of references and achieves a significant reduction of the labeling error.}, author = {Kluegl, Peter and Toepfer, Martin and Lemmerich, Florian and Hotho, Andreas and Puppe, Frank}, interhash = {9ef3f543e4cc9e2b0ef078595f92013b}, intrahash = {fbaab25e96dd20d96ece9d7fefdc3b4f}, journal = {Mathematical Methodologies in Pattern Recognition and Machine Learning Springer Proceedings in Mathematics & Statistics}, pages = {111-125}, title = {Exploiting Structural Consistencies with Stacked Conditional Random Fields}, volume = 30, year = 2013 } @incollection{ABP:11, author = {Atzmueller, Martin and Beer, Stephanie and Puppe, Frank}, booktitle = {Collaboration and the Semantic Web: Social Networks, Knowledge Networks, and Knowledge Resources}, editor = {Brüggemann, Stefan and d’Amato, Claudia}, interhash = {9c0d3f10e985d8654d8a2eae39121ef2}, intrahash = {781410de8780f9033aae08162cbdf073}, pages = {149-167}, publisher = {IGI Global}, title = {{Data Mining, Validation and Collaborative Knowledge Capture}}, year = 2012 } @inproceedings{conf/pkdd/KluglTLHP12, author = {Klügl, Peter and Toepfer, Martin and Lemmerich, Florian and Hotho, Andreas and Puppe, Frank}, booktitle = {ECML/PKDD (1)}, crossref = {conf/pkdd/2012-1}, editor = {Flach, Peter A. and Bie, Tijl De and Cristianini, Nello}, ee = {http://dx.doi.org/10.1007/978-3-642-33460-3_52}, interhash = {ccd3a716939562b7e91ecb057ae7df2d}, intrahash = {afd38525dbe0f52db7389e03aa7df1f7}, isbn = {978-3-642-33459-7}, pages = {728-743}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, title = {Collective Information Extraction with Context-Specific Consistencies.}, url = {http://dblp.uni-trier.de/db/conf/pkdd/pkdd2012-1.html#KluglTLHP12}, volume = 7523, year = 2012 } @inproceedings{kluegl2012stacked, abstract = {Conditional Random Fields CRF are popular methods for labeling unstructured or textual data. Like many machine learning approaches these undirected graphical models assume the instances to be independently distributed. However, in real world applications data is grouped in a natural way, e.g., by its creation context. The instances in each group often share additional structural consistencies. This paper proposes a domain-independent method for exploiting these consistencies by combining two CRFs in a stacked learning framework. The approach incorporates three successive steps of inference: First, an initial CRF processes single instances as usual. Next, we apply rule learning collectively on all labeled outputs of one context to acquire descriptions of its specific properties. Finally, we utilize these descriptions as dynamic and high quality features in an additional stacked CRF. The presented approach is evaluated with a real-world dataset for the segmentation of references and achieves a significant reduction of the labeling error.}, address = {Vilamoura, Algarve, Portugal}, author = {Klügl, Peter and Toepfer, Martin and Lemmerich, Florian and Hotho, Andreas and Puppe, Frank}, booktitle = {Proceedings of 1st International Conference on Pattern Recognition Applications and Methods ICPRAM}, editor = {Carmona, Pedro Latorre and Sánchez, J. Salvador and Fred, Ana}, interhash = {74969e59c5637d192021e35bbd02bece}, intrahash = {7920d13d4fce68bb9a4947585083986e}, pages = {240-248}, publisher = {SciTePress}, title = {Stacked Conditional Random Fields Exploiting Structural Consistencies}, url = {http://ki.informatik.uni-wuerzburg.de/papers/pkluegl/2012-ICPRAM-StackedCRF.pdf}, year = 2012 } @inproceedings{toepfer2011segmentation, author = {Toepfer, Martin and Kluegl, Peter and Hotho, Andreas and Puppe, Frank}, booktitle = {Workshop Notes of the LWA 2011 - Learning, Knowledge, Adaptation}, interhash = {3bd61ad3f9b4f1e221e79ecb3b4cae39}, intrahash = {b707fa6ddf5b3010827868ecebc60d6a}, title = {Segmentation of References with Skip-Chain Conditional Random Fields for Consistent Label Transitions}, url = {http://ki.informatik.uni-wuerzburg.de/papers/pkluegl/2011-LWA-SkYp.pdf}, year = 2011 } @incollection{ABP:11, author = {Atzmueller, Martin and Beer, Stephanie and Puppe, Frank}, booktitle = {Collaboration and the Semantic Web: Social Networks, Knowledge Networks and Knowledge Resources.}, editor = {Brüggemann, Stefan and d’Amato, Claudia}, interhash = {83cf9bd4264c938fa454c381a69e9880}, intrahash = {63df2eaae8a6df990b0a56379531a242}, publisher = {IGI Global}, title = {{Data Mining, Validation and Collaborative Knowledge Capture}}, year = 2011 } @inproceedings{2010-KI-KHP, abstract = {The accurate extraction of scholarly reference information from scientific publications is essential for many useful applications like BibTeX management systems or citation analysis. Automatic extraction methods suffer from the heterogeneity of reference notation, no matter wether the extraction model was handcrafted or learnt from labeled data. However, references of the same paper or journal are usually homogeneous. We exploit this local consistency with a novel approach. Given some initial information from such a reference section, we try to derived generalized patterns. These patterns are used to create a local model of the current document. The local model helps to identify errors and to improve the extracted information incrementally during the extraction process. Our approach is implemented with handcrafted transformation rules working on a meta-level being able to correct the information independent of the applied layout style. The experimental results compete very well with the state of the art methods and show an extremely high performance on consistent reference sections. }, author = {Kluegl, Peter and Hotho, Andreas and Puppe, Frank}, booktitle = {KI 2010: Advances in Artificial Intelligence, 33rd Annual German Conference on AI}, editor = {Dillmann, Rüdiger and Beyerer, Jürgen and Hanebeck, Uwe D. and Schultz, Tanja}, interhash = {b6a5b2a32346b60eac912ee96e681dce}, intrahash = {174791d9668705cbf0052224694f5366}, isbn = {978-3-642-16110-0}, pages = {40-47}, publisher = {Springer}, series = { LNAI 6359}, title = {Local Adaptive Extraction of References}, url = {http://ki.informatik.uni-wuerzburg.de/papers/pkluegl/2010-KI-LAER.pdf}, year = 2010 } @inproceedings{kdml21, abstract = {The accurate extraction of bibliographic information from scientific publications is an active field of research. Machine learning and sequence labeling approaches like Conditional Random Fields (CRF) are often applied for this reference extraction task, but still suffer from the ambiguity of reference notation. Reference sections apply a predefined style guide and contain only homogeneous references. Therefore, other references of the same paper or journal often provide evidence how the fields of a reference are correctly labeled. We propose a novel approach that exploits the similarities within a document. Our process model uses information of unlabeled documents directly during the extraction task in order to automatically adapt to the perceived style guide. This is implemented by changing the manifestation of the features for the applied CRF. The experimental results show considerable improvements compared to the common approach. We achieve an average F1 score of 96.7% and an instance accuracy of 85.4% on the test data set.}, address = {Kassel, Germany}, author = {Toepfer, Martin and Kluegl, Peter and Hotho, Andreas and Puppe., Frank}, booktitle = {Proceedings of LWA2010 - Workshop-Woche: Lernen, Wissen {\&} Adaptivitaet}, crossref = {lwa2010}, editor = {Atzmüller, Martin and Benz, Dominik and Hotho, Andreas and Stumme, Gerd}, interhash = {d8f45281363701bfe7f979b1e13ee269}, intrahash = {37242cd584805b2e4cea0c486008889d}, presentation_end = {2010-10-05 16:45:00}, presentation_start = {2010-10-05 16:22:30}, room = {0446}, session = {kdml2}, title = {Conditional Random Fields For Local Adaptive Reference Extraction}, track = {kdml}, url = {http://www.kde.cs.uni-kassel.de/conf/lwa10/papers/kdml21.pdf}, year = 2010 } @inproceedings{BAP:02, author = {Baumeister, Joachim and Atzmueller, Martin and Puppe, Frank}, booktitle = {Advances in Case-Based Reasoning}, interhash = {bfae82181f17ea7b88fd9d52d9bdd931}, intrahash = {032258a6dda14093880c3c8bff63ac62}, note = {Proc. 6th European Conference on Case-Based Reasoning (ECCBR 2002)}, pages = {28-42}, series = {LNAI}, title = {{Inductive Learning for Case-Based Diagnosis with Multiple Faults}}, volume = 2416, year = 2002 } @inproceedings{ABHMLP:08, address = {Erfurt}, author = {Atzmueller, Martin and Beer, Stephanie and H\"ornlein, Alexander and Melcher, Ralf and L\"uhrs, Hardi and Puppe, Frank}, booktitle = {Proc. 1st European Workshop on Design, Evaluation and Refinement of Intelligent Systems}, interhash = {f059423faf565033982e54c1a2766b9e}, intrahash = {741787b841afcb8949b850185b56aeee}, title = {{Design and Implementation of a Data Warehouse for Quality Management, System Evaluation and Knowledge Discovery in the Medical Domain}}, year = 2008 } @inproceedings{AP:06a, author = {Atzmueller, Martin and Puppe, Frank}, booktitle = {Proc. LWA 2006 (KDML Special Track), Hildesheimer Informatik Berichte}, interhash = {5b8eb3d160e202fc56770062bdd167bd}, intrahash = {5e24fcd6163c894b62fb4ec1c1a549b7}, publisher = {University of Hildesheim}, title = {{Case-Based Characterization and Analysis of Subgroup Patterns}}, year = 2006 } @inproceedings{AP:07a, address = {Wuerzburg, Germany}, author = {Atzmueller, Martin and Puppe, Frank}, booktitle = {Proc. 18th International Conference on Applications of Declarative Programming and Knowledge Management (INAP 2007)}, interhash = {5cd62702810c3ac276ad6d138bb92e76}, intrahash = {f14b309aad99d095c505fe68916b13c6}, optpublisher = {University of Wuerzburg}, title = {{Causal Subgroup Analysis for Detecting Confounding}}, year = 2007 } @inproceedings{ABHRP:05, author = {Atzmueller, Martin and Baumeister, Joachim and Hemsing, Achim and Richter, Ernst-J\"urgen and Puppe, Frank}, booktitle = {Proc. 10th Conference on Artificial Intelligence in Medicine (AIME 05)}, interhash = {719a62b3f8634acabb84a916be0f7208}, intrahash = {2ae8553c5fc000870fcaaabf3252c675}, pages = {453--462}, series = {LNAI 3581}, title = {{Subgroup Mining for Interactive Knowledge Refinement}}, year = 2005 } @article{APB:09, author = {Atzmueller, Martin and Puppe, Frank and Buscher, Hans-Peter}, interhash = {7035d39f90827971b2b5dbd63ec950bf}, intrahash = {818608d57145cfc07a75e9608d059d23}, journal = {International Journal on Artificial Intelligence Tools (IJAIT)}, number = 1, pages = {1 -- 18}, title = {{A Semi-Automatic Approach for Confounding-Aware Subgroup Discovery}}, volume = 18, year = 2009 } @inproceedings{PABHLB:08, author = {Puppe, Frank and Atzmueller, Martin and Buscher, Georg and Huettig, Matthias and Lührs, Hardi and Buscher, Hans-Peter}, booktitle = {Proc. 18th European Conference on Artificial Intelligence (ECAI 20008), accepted}, interhash = {0a275f930071c1744c4995bc56b6a323}, intrahash = {40d6ae61a6394ad9f8d85d3abbb5c416}, title = {{Application and Evaluation of a Medical Knowledge-System in Sonography (SonoConsult)}}, year = 2008 } @inproceedings{AP:06a, author = {Atzmueller, Martin and Puppe, Frank}, booktitle = {Proc. 10th European Conference on Principles and Practice of Knowledge Discovery in Databases (PKDD 2006)}, interhash = {7ec9ca827a90b2141c5a1564b9b34110}, intrahash = {3b78e5ecc6e1374bbcf3cfc9c9abd610}, number = 4213, pages = {6-17}, series = {LNAI}, title = {{SD-Map -- A Fast Algorithm for Exhaustive Subgroup Discovery}}, year = 2006 } @inproceedings{ABKP:07, author = {Atzmueller, Martin and Baumeister, Joachim and Kl\"ugl, Peter and Puppe, Frank}, booktitle = {Proc. 4th International Conference on Knowledge Capture (K-CAP 2007)}, interhash = {8d02bae2895638184cf72a07a2b61c5a}, intrahash = {c80a9e45fbee568ab97cdf22292630dd}, pages = {31--38}, publisher = {ACM Press}, title = {{Rapid Knowledge Capture Using Subgroup Discovery with Incremental Refinement}}, year = 2007 } @inproceedings{ABP:09, author = {Atzmueller, Martin and Beer, Stephanie and Puppe, Frank}, booktitle = {Proc. 22nd International Florida Artificial Intelligence Research Society Conference (FLAIRS), accepted}, interhash = {ab3138620625200cb818970bc1615925}, intrahash = {5b8c0b2e4bd3d4380591a603b9dccc73}, pages = {372-377}, publisher = {AAAI Press}, title = {{A Data Warehouse-Based Approach for Quality Management, Evaluation and Analysis of Intelligent Systems using Subgroup Mining}}, year = 2009 } @inproceedings{BAKP:06, author = {Baumeister, Joachim and Atzmueller, Martin and Kluegl, Peter and Puppe, Frank}, booktitle = {Proc. 19th Intl. Florida Artificial Intelligence Research Society Conference 2006 (FLAIRS-2006)}, editor = {Sutcliffe, Geoff and Goebel, Randy}, interhash = {d53d8f0e0f0dc82097603dc93e9f309d}, intrahash = {9d824482a6b2d92790310693947d031f}, pages = {408--413}, publisher = {AAAI Press}, title = {{Conservative and Creative Strategies for the Refinement of Scoring Rules}}, year = 2006 } @inproceedings{ABP:05, author = {Atzmueller, Martin and Baumeister, Joachim and Puppe, Frank}, booktitle = {Proc. 13th Leipziger Informatik-Tage 2005 (LIT 2005)}, interhash = {99c23144da3bcc421322ccbad9d8e7eb}, intrahash = {60a2d23f64ff64428d01257551b4156f}, pages = {101-106}, series = {LNI}, title = {{Exemplifying Subgroup Mining Results for Interactive Knowledge Refinement}}, year = 2005 }