@inproceedings{koch-etal-2023-tailored,
title = "A tailored Handwritten-Text-Recognition System for Medieval {L}atin",
author = {Koch, Philipp and
Nu{\~n}ez, Gilary Vera and
Garces Arias, Esteban and
Heumann, Christian and
Sch{\"o}ffel, Matthias and
H{\"a}berlin, Alexander and
Assenmacher, Matthias},
editor = "Anderson, Adam and
Gordin, Shai and
Li, Bin and
Liu, Yudong and
Passarotti, Marco C.",
booktitle = "Proceedings of the Ancient Language Processing Workshop",
month = sep,
year = "2023",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2023.alp-1.12",
pages = "103--110",
abstract = "The Bavarian Academy of Sciences and Humanities aims to digitize the Medieval Latin Dictionary. This dictionary entails record cards referring to lemmas in medieval Latin, a low-resource language. A crucial step of the digitization process is the handwritten text recognition (HTR) of the handwritten lemmas on the record cards. In our work, we introduce an end-to-end pipeline, tailored for the medieval Latin dictionary, for locating, extracting, and transcribing the lemmas. We employ two state-of-the-art image segmentation models to prepare the initial data set for the HTR task. Further, we experiment with different transformer-based models and conduct a set of experiments to explore the capabilities of different combinations of vision encoders with a GPT-2 decoder. Additionally, we also apply extensive data augmentation resulting in a highly competitive model. The best-performing setup achieved a character error rate of 0.015, which is even superior to the commercial Google Cloud Vision model, and shows more stable performance.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="koch-etal-2023-tailored">
<titleInfo>
<title>A tailored Handwritten-Text-Recognition System for Medieval Latin</title>
</titleInfo>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Koch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gilary</namePart>
<namePart type="given">Vera</namePart>
<namePart type="family">Nuñez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Esteban</namePart>
<namePart type="family">Garces Arias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Heumann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthias</namePart>
<namePart type="family">Schöffel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Häberlin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthias</namePart>
<namePart type="family">Assenmacher</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ancient Language Processing Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Adam</namePart>
<namePart type="family">Anderson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shai</namePart>
<namePart type="family">Gordin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bin</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yudong</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="given">C</namePart>
<namePart type="family">Passarotti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The Bavarian Academy of Sciences and Humanities aims to digitize the Medieval Latin Dictionary. This dictionary entails record cards referring to lemmas in medieval Latin, a low-resource language. A crucial step of the digitization process is the handwritten text recognition (HTR) of the handwritten lemmas on the record cards. In our work, we introduce an end-to-end pipeline, tailored for the medieval Latin dictionary, for locating, extracting, and transcribing the lemmas. We employ two state-of-the-art image segmentation models to prepare the initial data set for the HTR task. Further, we experiment with different transformer-based models and conduct a set of experiments to explore the capabilities of different combinations of vision encoders with a GPT-2 decoder. Additionally, we also apply extensive data augmentation resulting in a highly competitive model. The best-performing setup achieved a character error rate of 0.015, which is even superior to the commercial Google Cloud Vision model, and shows more stable performance.</abstract>
<identifier type="citekey">koch-etal-2023-tailored</identifier>
<location>
<url>https://aclanthology.org/2023.alp-1.12</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>103</start>
<end>110</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A tailored Handwritten-Text-Recognition System for Medieval Latin
%A Koch, Philipp
%A Nuñez, Gilary Vera
%A Garces Arias, Esteban
%A Heumann, Christian
%A Schöffel, Matthias
%A Häberlin, Alexander
%A Assenmacher, Matthias
%Y Anderson, Adam
%Y Gordin, Shai
%Y Li, Bin
%Y Liu, Yudong
%Y Passarotti, Marco C.
%S Proceedings of the Ancient Language Processing Workshop
%D 2023
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F koch-etal-2023-tailored
%X The Bavarian Academy of Sciences and Humanities aims to digitize the Medieval Latin Dictionary. This dictionary entails record cards referring to lemmas in medieval Latin, a low-resource language. A crucial step of the digitization process is the handwritten text recognition (HTR) of the handwritten lemmas on the record cards. In our work, we introduce an end-to-end pipeline, tailored for the medieval Latin dictionary, for locating, extracting, and transcribing the lemmas. We employ two state-of-the-art image segmentation models to prepare the initial data set for the HTR task. Further, we experiment with different transformer-based models and conduct a set of experiments to explore the capabilities of different combinations of vision encoders with a GPT-2 decoder. Additionally, we also apply extensive data augmentation resulting in a highly competitive model. The best-performing setup achieved a character error rate of 0.015, which is even superior to the commercial Google Cloud Vision model, and shows more stable performance.
%U https://aclanthology.org/2023.alp-1.12
%P 103-110
Markdown (Informal)
[A tailored Handwritten-Text-Recognition System for Medieval Latin](https://aclanthology.org/2023.alp-1.12) (Koch et al., ALP-WS 2023)
ACL
- Philipp Koch, Gilary Vera Nuñez, Esteban Garces Arias, Christian Heumann, Matthias Schöffel, Alexander Häberlin, and Matthias Assenmacher. 2023. A tailored Handwritten-Text-Recognition System for Medieval Latin. In Proceedings of the Ancient Language Processing Workshop, pages 103–110, Varna, Bulgaria. INCOMA Ltd., Shoumen, Bulgaria.