@inproceedings{manakhimova-etal-2024-investigating,
title = "Investigating the Linguistic Performance of Large Language Models in Machine Translation",
author = {Manakhimova, Shushen and
Macketanz, Vivien and
Avramidis, Eleftherios and
Lapshinova-Koltunski, Ekaterina and
Bagdasarov, Sergei and
M{\"o}ller, Sebastian},
editor = "Haddow, Barry and
Kocmi, Tom and
Koehn, Philipp and
Monz, Christof",
booktitle = "Proceedings of the Ninth Conference on Machine Translation",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.wmt-1.28",
pages = "355--371",
abstract = "This paper summarizes the results of our test suite evaluation on 39 machine translation systems submitted at the Shared Task of the Ninth Conference of Machine Translation (WMT24). It offers a fine-grained linguistic evaluation of machine translation outputs for English{--}German and English{--}Russian, resulting from significant manual linguistic effort. Based on our results, LLMs are inferior to NMT in English{--}German, both in overall scores and when translating specific linguistic phenomena, such as punctuation, complex future verb tenses, and stripping. LLMs show quite a competitive performance in English-Russian, although top-performing systems might struggle with some cases of named entities and terminology, function words, mediopassive voice, and semantic roles. Additionally, some LLMs generate very verbose or empty outputs, posing challenges to the evaluation process.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="manakhimova-etal-2024-investigating">
<titleInfo>
<title>Investigating the Linguistic Performance of Large Language Models in Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shushen</namePart>
<namePart type="family">Manakhimova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivien</namePart>
<namePart type="family">Macketanz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eleftherios</namePart>
<namePart type="family">Avramidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Lapshinova-Koltunski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sergei</namePart>
<namePart type="family">Bagdasarov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Möller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth Conference on Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Barry</namePart>
<namePart type="family">Haddow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="family">Kocmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Koehn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christof</namePart>
<namePart type="family">Monz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper summarizes the results of our test suite evaluation on 39 machine translation systems submitted at the Shared Task of the Ninth Conference of Machine Translation (WMT24). It offers a fine-grained linguistic evaluation of machine translation outputs for English–German and English–Russian, resulting from significant manual linguistic effort. Based on our results, LLMs are inferior to NMT in English–German, both in overall scores and when translating specific linguistic phenomena, such as punctuation, complex future verb tenses, and stripping. LLMs show quite a competitive performance in English-Russian, although top-performing systems might struggle with some cases of named entities and terminology, function words, mediopassive voice, and semantic roles. Additionally, some LLMs generate very verbose or empty outputs, posing challenges to the evaluation process.</abstract>
<identifier type="citekey">manakhimova-etal-2024-investigating</identifier>
<location>
<url>https://aclanthology.org/2024.wmt-1.28</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>355</start>
<end>371</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Investigating the Linguistic Performance of Large Language Models in Machine Translation
%A Manakhimova, Shushen
%A Macketanz, Vivien
%A Avramidis, Eleftherios
%A Lapshinova-Koltunski, Ekaterina
%A Bagdasarov, Sergei
%A Möller, Sebastian
%Y Haddow, Barry
%Y Kocmi, Tom
%Y Koehn, Philipp
%Y Monz, Christof
%S Proceedings of the Ninth Conference on Machine Translation
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F manakhimova-etal-2024-investigating
%X This paper summarizes the results of our test suite evaluation on 39 machine translation systems submitted at the Shared Task of the Ninth Conference of Machine Translation (WMT24). It offers a fine-grained linguistic evaluation of machine translation outputs for English–German and English–Russian, resulting from significant manual linguistic effort. Based on our results, LLMs are inferior to NMT in English–German, both in overall scores and when translating specific linguistic phenomena, such as punctuation, complex future verb tenses, and stripping. LLMs show quite a competitive performance in English-Russian, although top-performing systems might struggle with some cases of named entities and terminology, function words, mediopassive voice, and semantic roles. Additionally, some LLMs generate very verbose or empty outputs, posing challenges to the evaluation process.
%U https://aclanthology.org/2024.wmt-1.28
%P 355-371
Markdown (Informal)
[Investigating the Linguistic Performance of Large Language Models in Machine Translation](https://aclanthology.org/2024.wmt-1.28) (Manakhimova et al., WMT 2024)
ACL