@inproceedings{west-etal-2024-deep,
title = "A deep learning pipeline for the palaeographical dating of ancient {G}reek papyrus fragments",
author = "West, Graham and
Swindall, Matthew and
Brusuelas, James and
Wallin, John and
Maltomini, Francesca and
Gerhardt, Marius and
D{'}Angelo, Marzia and
F. Wallin, John",
editor = "Pavlopoulos, John and
Sommerschield, Thea and
Assael, Yannis and
Gordin, Shai and
Cho, Kyunghyun and
Passarotti, Marco and
Sprugnoli, Rachele and
Liu, Yudong and
Li, Bin and
Anderson, Adam",
booktitle = "Proceedings of the 1st Workshop on Machine Learning for Ancient Languages (ML4AL 2024)",
month = aug,
year = "2024",
address = "Hybrid in Bangkok, Thailand and online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.ml4al-1.18",
doi = "10.18653/v1/2024.ml4al-1.18",
pages = "177--185",
abstract = "In this paper we present a deep learning pipeline for automatically dating ancient Greek papyrus fragments based solely on fragment images. The overall pipeline consists of several stages, including handwritten text recognition (HTR) to detect and classify characters, filtering and grouping of detected characters, 24 character-level date prediction models, and a fragment-level date prediction model that utilizes the per-character predictions. A new dataset (containing approximately 7,000 fragment images and 778,000 character images) was created by scraping papyrus databases, extracting fragment images with known dates, and running them through our HTR models to obtain labeled character images. Transfer learning was then used to fine-tune separate ResNets to predict dates for individual characters which are then used, in aggregate, to train the fragment-level date prediction model. Experiments show that even though the average accuracies of character-level dating models is low, between 35{\%}-45{\%}, the fragment-level model can achieve up to 79{\%} accuracy in predicting a broad, two-century date range for fragments with many characters. We then discuss the limitations of this approach and outline future work to improve temporal resolution and further testing on additional papyri. This image-based deep learning approach has great potential to assist scholars in the palaeographical analysis and dating of ancient Greek manuscripts.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="west-etal-2024-deep">
<titleInfo>
<title>A deep learning pipeline for the palaeographical dating of ancient Greek papyrus fragments</title>
</titleInfo>
<name type="personal">
<namePart type="given">Graham</namePart>
<namePart type="family">West</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="family">Swindall</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Brusuelas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="family">Wallin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francesca</namePart>
<namePart type="family">Maltomini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marius</namePart>
<namePart type="family">Gerhardt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marzia</namePart>
<namePart type="family">D’Angelo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="family">F. Wallin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Machine Learning for Ancient Languages (ML4AL 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="family">Pavlopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thea</namePart>
<namePart type="family">Sommerschield</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yannis</namePart>
<namePart type="family">Assael</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shai</namePart>
<namePart type="family">Gordin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kyunghyun</namePart>
<namePart type="family">Cho</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Passarotti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rachele</namePart>
<namePart type="family">Sprugnoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yudong</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bin</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adam</namePart>
<namePart type="family">Anderson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hybrid in Bangkok, Thailand and online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper we present a deep learning pipeline for automatically dating ancient Greek papyrus fragments based solely on fragment images. The overall pipeline consists of several stages, including handwritten text recognition (HTR) to detect and classify characters, filtering and grouping of detected characters, 24 character-level date prediction models, and a fragment-level date prediction model that utilizes the per-character predictions. A new dataset (containing approximately 7,000 fragment images and 778,000 character images) was created by scraping papyrus databases, extracting fragment images with known dates, and running them through our HTR models to obtain labeled character images. Transfer learning was then used to fine-tune separate ResNets to predict dates for individual characters which are then used, in aggregate, to train the fragment-level date prediction model. Experiments show that even though the average accuracies of character-level dating models is low, between 35%-45%, the fragment-level model can achieve up to 79% accuracy in predicting a broad, two-century date range for fragments with many characters. We then discuss the limitations of this approach and outline future work to improve temporal resolution and further testing on additional papyri. This image-based deep learning approach has great potential to assist scholars in the palaeographical analysis and dating of ancient Greek manuscripts.</abstract>
<identifier type="citekey">west-etal-2024-deep</identifier>
<identifier type="doi">10.18653/v1/2024.ml4al-1.18</identifier>
<location>
<url>https://aclanthology.org/2024.ml4al-1.18</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>177</start>
<end>185</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A deep learning pipeline for the palaeographical dating of ancient Greek papyrus fragments
%A West, Graham
%A Swindall, Matthew
%A Brusuelas, James
%A Wallin, John
%A Maltomini, Francesca
%A Gerhardt, Marius
%A D’Angelo, Marzia
%A F. Wallin, John
%Y Pavlopoulos, John
%Y Sommerschield, Thea
%Y Assael, Yannis
%Y Gordin, Shai
%Y Cho, Kyunghyun
%Y Passarotti, Marco
%Y Sprugnoli, Rachele
%Y Liu, Yudong
%Y Li, Bin
%Y Anderson, Adam
%S Proceedings of the 1st Workshop on Machine Learning for Ancient Languages (ML4AL 2024)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Hybrid in Bangkok, Thailand and online
%F west-etal-2024-deep
%X In this paper we present a deep learning pipeline for automatically dating ancient Greek papyrus fragments based solely on fragment images. The overall pipeline consists of several stages, including handwritten text recognition (HTR) to detect and classify characters, filtering and grouping of detected characters, 24 character-level date prediction models, and a fragment-level date prediction model that utilizes the per-character predictions. A new dataset (containing approximately 7,000 fragment images and 778,000 character images) was created by scraping papyrus databases, extracting fragment images with known dates, and running them through our HTR models to obtain labeled character images. Transfer learning was then used to fine-tune separate ResNets to predict dates for individual characters which are then used, in aggregate, to train the fragment-level date prediction model. Experiments show that even though the average accuracies of character-level dating models is low, between 35%-45%, the fragment-level model can achieve up to 79% accuracy in predicting a broad, two-century date range for fragments with many characters. We then discuss the limitations of this approach and outline future work to improve temporal resolution and further testing on additional papyri. This image-based deep learning approach has great potential to assist scholars in the palaeographical analysis and dating of ancient Greek manuscripts.
%R 10.18653/v1/2024.ml4al-1.18
%U https://aclanthology.org/2024.ml4al-1.18
%U https://doi.org/10.18653/v1/2024.ml4al-1.18
%P 177-185
Markdown (Informal)
[A deep learning pipeline for the palaeographical dating of ancient Greek papyrus fragments](https://aclanthology.org/2024.ml4al-1.18) (West et al., ML4AL-WS 2024)
ACL
- Graham West, Matthew Swindall, James Brusuelas, John Wallin, Francesca Maltomini, Marius Gerhardt, Marzia D’Angelo, and John F. Wallin. 2024. A deep learning pipeline for the palaeographical dating of ancient Greek papyrus fragments. In Proceedings of the 1st Workshop on Machine Learning for Ancient Languages (ML4AL 2024), pages 177–185, Hybrid in Bangkok, Thailand and online. Association for Computational Linguistics.