@inproceedings{wannaz-miyagawa-2024-assessing,
title = "Assessing Large Language Models in Translating {C}optic and {A}ncient {G}reek Ostraca",
author = "Wannaz, Audric-Charles and
Miyagawa, So",
editor = {H{\"a}m{\"a}l{\"a}inen, Mika and
{\"O}hman, Emily and
Miyagawa, So and
Alnajjar, Khalid and
Bizzoni, Yuri},
booktitle = "Proceedings of the 4th International Conference on Natural Language Processing for Digital Humanities",
month = nov,
year = "2024",
address = "Miami, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.nlp4dh-1.44",
pages = "463--471",
abstract = "The advent of Large Language Models (LLMs) substantially raised the quality and lowered the cost of Machine Translation (MT). Can scholars working with ancient languages draw benefits from this new technology? More specifically, can current MT facilitate multilingual digital papyrology? To answer this question, we evaluate 9 LLMs in the task of MT with 4 Coptic and 4 Ancient Greek ostraca into English using 6 NLP metrics. We argue that some models have already reached a performance apt to assist human experts. As can be expected from the difference in training corpus size, all models seem to perform better with Ancient Greek than with Coptic, where hallucinations are markedly more common. In the Coptic texts, the specialised Coptic Translator (CT) competes closely with Claude 3 Opus for the rank of most promising tool, while Claude 3 Opus and GPT-4o compete for the same position in the Ancient Greek texts. We argue that MT now substantially heightens the incentive to work on multilingual corpora. This could have a positive and long-lasting effect on Classics and Egyptology and help reduce the historical bias in translation availability. In closing, we reflect upon the need to meet AI-generated translations with an adequate critical stance.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wannaz-miyagawa-2024-assessing">
<titleInfo>
<title>Assessing Large Language Models in Translating Coptic and Ancient Greek Ostraca</title>
</titleInfo>
<name type="personal">
<namePart type="given">Audric-Charles</namePart>
<namePart type="family">Wannaz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">So</namePart>
<namePart type="family">Miyagawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th International Conference on Natural Language Processing for Digital Humanities</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mika</namePart>
<namePart type="family">Hämäläinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emily</namePart>
<namePart type="family">Öhman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">So</namePart>
<namePart type="family">Miyagawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Alnajjar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuri</namePart>
<namePart type="family">Bizzoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The advent of Large Language Models (LLMs) substantially raised the quality and lowered the cost of Machine Translation (MT). Can scholars working with ancient languages draw benefits from this new technology? More specifically, can current MT facilitate multilingual digital papyrology? To answer this question, we evaluate 9 LLMs in the task of MT with 4 Coptic and 4 Ancient Greek ostraca into English using 6 NLP metrics. We argue that some models have already reached a performance apt to assist human experts. As can be expected from the difference in training corpus size, all models seem to perform better with Ancient Greek than with Coptic, where hallucinations are markedly more common. In the Coptic texts, the specialised Coptic Translator (CT) competes closely with Claude 3 Opus for the rank of most promising tool, while Claude 3 Opus and GPT-4o compete for the same position in the Ancient Greek texts. We argue that MT now substantially heightens the incentive to work on multilingual corpora. This could have a positive and long-lasting effect on Classics and Egyptology and help reduce the historical bias in translation availability. In closing, we reflect upon the need to meet AI-generated translations with an adequate critical stance.</abstract>
<identifier type="citekey">wannaz-miyagawa-2024-assessing</identifier>
<location>
<url>https://aclanthology.org/2024.nlp4dh-1.44</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>463</start>
<end>471</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Assessing Large Language Models in Translating Coptic and Ancient Greek Ostraca
%A Wannaz, Audric-Charles
%A Miyagawa, So
%Y Hämäläinen, Mika
%Y Öhman, Emily
%Y Miyagawa, So
%Y Alnajjar, Khalid
%Y Bizzoni, Yuri
%S Proceedings of the 4th International Conference on Natural Language Processing for Digital Humanities
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, USA
%F wannaz-miyagawa-2024-assessing
%X The advent of Large Language Models (LLMs) substantially raised the quality and lowered the cost of Machine Translation (MT). Can scholars working with ancient languages draw benefits from this new technology? More specifically, can current MT facilitate multilingual digital papyrology? To answer this question, we evaluate 9 LLMs in the task of MT with 4 Coptic and 4 Ancient Greek ostraca into English using 6 NLP metrics. We argue that some models have already reached a performance apt to assist human experts. As can be expected from the difference in training corpus size, all models seem to perform better with Ancient Greek than with Coptic, where hallucinations are markedly more common. In the Coptic texts, the specialised Coptic Translator (CT) competes closely with Claude 3 Opus for the rank of most promising tool, while Claude 3 Opus and GPT-4o compete for the same position in the Ancient Greek texts. We argue that MT now substantially heightens the incentive to work on multilingual corpora. This could have a positive and long-lasting effect on Classics and Egyptology and help reduce the historical bias in translation availability. In closing, we reflect upon the need to meet AI-generated translations with an adequate critical stance.
%U https://aclanthology.org/2024.nlp4dh-1.44
%P 463-471
Markdown (Informal)
[Assessing Large Language Models in Translating Coptic and Ancient Greek Ostraca](https://aclanthology.org/2024.nlp4dh-1.44) (Wannaz & Miyagawa, NLP4DH 2024)
ACL