@inproceedings{fernandes-lopes-2024-open,
title = "Open-source {LLM}s vs. {NMT} Systems: Translating Spatial Language in {EN}-{PT}-br Subtitles",
author = "Fernandes, Rafael and
Lopes, Marcos",
editor = "Martindale, Marianna and
Campbell, Janice and
Savenkov, Konstantin and
Goel, Shivali",
booktitle = "Proceedings of the 16th Conference of the Association for Machine Translation in the Americas (Volume 2: Presentations)",
month = sep,
year = "2024",
address = "Chicago, USA",
publisher = "Association for Machine Translation in the Americas",
url = "https://aclanthology.org/2024.amta-presentations.11",
pages = "152--153",
abstract = "This research investigates the challenges of translating spatial language using open-source LLMs versus traditional NMTs. Focusing on spatial prepositions like ACROSS, INTO, ONTO, and THROUGH, which are particularly challenging for the EN-PT-br pair, the study evaluates translations using BLEU, METEOR, BERTScore, COMET, and TER metrics, along with manual error analysis. The findings reveal that moderate-sized LLMs, such as LLaMa-3-8B and Mixtral-8x7B, achieve accuracy comparable to NMTs like DeepL. However, LLMs frequently exhibit mistranslation errors, including interlanguage/code-switching and anglicisms, while NMTs demonstrate better fluency. Both LLMs and NMTs struggle with spatial-related errors, including syntactic projections and polysemy. The study concludes that significant hurdles remain in accurately translating spatial language, suggesting that future research should focus on enhancing training datasets, refining models, and developing more sophisticated evaluation metrics.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fernandes-lopes-2024-open">
<titleInfo>
<title>Open-source LLMs vs. NMT Systems: Translating Spatial Language in EN-PT-br Subtitles</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rafael</namePart>
<namePart type="family">Fernandes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Lopes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 16th Conference of the Association for Machine Translation in the Americas (Volume 2: Presentations)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Martindale</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Janice</namePart>
<namePart type="family">Campbell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Konstantin</namePart>
<namePart type="family">Savenkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shivali</namePart>
<namePart type="family">Goel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Machine Translation in the Americas</publisher>
<place>
<placeTerm type="text">Chicago, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This research investigates the challenges of translating spatial language using open-source LLMs versus traditional NMTs. Focusing on spatial prepositions like ACROSS, INTO, ONTO, and THROUGH, which are particularly challenging for the EN-PT-br pair, the study evaluates translations using BLEU, METEOR, BERTScore, COMET, and TER metrics, along with manual error analysis. The findings reveal that moderate-sized LLMs, such as LLaMa-3-8B and Mixtral-8x7B, achieve accuracy comparable to NMTs like DeepL. However, LLMs frequently exhibit mistranslation errors, including interlanguage/code-switching and anglicisms, while NMTs demonstrate better fluency. Both LLMs and NMTs struggle with spatial-related errors, including syntactic projections and polysemy. The study concludes that significant hurdles remain in accurately translating spatial language, suggesting that future research should focus on enhancing training datasets, refining models, and developing more sophisticated evaluation metrics.</abstract>
<identifier type="citekey">fernandes-lopes-2024-open</identifier>
<location>
<url>https://aclanthology.org/2024.amta-presentations.11</url>
</location>
<part>
<date>2024-09</date>
<extent unit="page">
<start>152</start>
<end>153</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Open-source LLMs vs. NMT Systems: Translating Spatial Language in EN-PT-br Subtitles
%A Fernandes, Rafael
%A Lopes, Marcos
%Y Martindale, Marianna
%Y Campbell, Janice
%Y Savenkov, Konstantin
%Y Goel, Shivali
%S Proceedings of the 16th Conference of the Association for Machine Translation in the Americas (Volume 2: Presentations)
%D 2024
%8 September
%I Association for Machine Translation in the Americas
%C Chicago, USA
%F fernandes-lopes-2024-open
%X This research investigates the challenges of translating spatial language using open-source LLMs versus traditional NMTs. Focusing on spatial prepositions like ACROSS, INTO, ONTO, and THROUGH, which are particularly challenging for the EN-PT-br pair, the study evaluates translations using BLEU, METEOR, BERTScore, COMET, and TER metrics, along with manual error analysis. The findings reveal that moderate-sized LLMs, such as LLaMa-3-8B and Mixtral-8x7B, achieve accuracy comparable to NMTs like DeepL. However, LLMs frequently exhibit mistranslation errors, including interlanguage/code-switching and anglicisms, while NMTs demonstrate better fluency. Both LLMs and NMTs struggle with spatial-related errors, including syntactic projections and polysemy. The study concludes that significant hurdles remain in accurately translating spatial language, suggesting that future research should focus on enhancing training datasets, refining models, and developing more sophisticated evaluation metrics.
%U https://aclanthology.org/2024.amta-presentations.11
%P 152-153
Markdown (Informal)
[Open-source LLMs vs. NMT Systems: Translating Spatial Language in EN-PT-br Subtitles](https://aclanthology.org/2024.amta-presentations.11) (Fernandes & Lopes, AMTA 2024)
ACL