@inproceedings{janssen-etal-2025-alignment,
title = "Alignment of Historical Manuscript Transcriptions and Translations",
author = "Janssen, Maarten and
Lendvai, Piroska and
Jouravel, Anna",
editor = "Angelova, Galia and
Kunilovskaya, Maria and
Escribe, Marie and
Mitkov, Ruslan",
booktitle = "Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2025.ranlp-1.55/",
pages = "462--470",
abstract = "Using an XML-based framework, we compiled a gold standard for alignments in five primary as well as derived texts, related to \textit{De Lepra ad Sistelium} by Methodius Olympius. These comprise diplomatic transcripts, editions, and translations of this work, involving both historical and modern languages. Using the TEITOK corpus platform, we created sentence-level gold standard alignments for our parallel resp. comparable texts, and applied both neural and classical alignment methods (SentenceBERT, Hunalign, Awesome-Align). We evaluated the methods in terms of Alignment Error Rate. We show that for alignment of our historical texts, Hunalign performs better than deep learning based methods."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="janssen-etal-2025-alignment">
<titleInfo>
<title>Alignment of Historical Manuscript Transcriptions and Translations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maarten</namePart>
<namePart type="family">Janssen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Piroska</namePart>
<namePart type="family">Lendvai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Jouravel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era</title>
</titleInfo>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Kunilovskaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie</namePart>
<namePart type="family">Escribe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Using an XML-based framework, we compiled a gold standard for alignments in five primary as well as derived texts, related to De Lepra ad Sistelium by Methodius Olympius. These comprise diplomatic transcripts, editions, and translations of this work, involving both historical and modern languages. Using the TEITOK corpus platform, we created sentence-level gold standard alignments for our parallel resp. comparable texts, and applied both neural and classical alignment methods (SentenceBERT, Hunalign, Awesome-Align). We evaluated the methods in terms of Alignment Error Rate. We show that for alignment of our historical texts, Hunalign performs better than deep learning based methods.</abstract>
<identifier type="citekey">janssen-etal-2025-alignment</identifier>
<location>
<url>https://aclanthology.org/2025.ranlp-1.55/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>462</start>
<end>470</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Alignment of Historical Manuscript Transcriptions and Translations
%A Janssen, Maarten
%A Lendvai, Piroska
%A Jouravel, Anna
%Y Angelova, Galia
%Y Kunilovskaya, Maria
%Y Escribe, Marie
%Y Mitkov, Ruslan
%S Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era
%D 2025
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F janssen-etal-2025-alignment
%X Using an XML-based framework, we compiled a gold standard for alignments in five primary as well as derived texts, related to De Lepra ad Sistelium by Methodius Olympius. These comprise diplomatic transcripts, editions, and translations of this work, involving both historical and modern languages. Using the TEITOK corpus platform, we created sentence-level gold standard alignments for our parallel resp. comparable texts, and applied both neural and classical alignment methods (SentenceBERT, Hunalign, Awesome-Align). We evaluated the methods in terms of Alignment Error Rate. We show that for alignment of our historical texts, Hunalign performs better than deep learning based methods.
%U https://aclanthology.org/2025.ranlp-1.55/
%P 462-470
Markdown (Informal)
[Alignment of Historical Manuscript Transcriptions and Translations](https://aclanthology.org/2025.ranlp-1.55/) (Janssen et al., RANLP 2025)
ACL
- Maarten Janssen, Piroska Lendvai, and Anna Jouravel. 2025. Alignment of Historical Manuscript Transcriptions and Translations. In Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era, pages 462–470, Varna, Bulgaria. INCOMA Ltd., Shoumen, Bulgaria.