@inproceedings{riina-etal-2024-evaluation,
title = "An Evaluation of {E}nglish to {S}panish Medical Translation by Large Language Models",
author = "Riina, Nicholas and
Patlolla, Likhitha and
Hernandez Joya, Camilo and
Bautista, Roger and
Olivar-Villanueva, Melissa and
Kumar, Anish",
editor = "Martindale, Marianna and
Campbell, Janice and
Savenkov, Konstantin and
Goel, Shivali",
booktitle = "Proceedings of the 16th Conference of the Association for Machine Translation in the Americas (Volume 2: Presentations)",
month = sep,
year = "2024",
address = "Chicago, USA",
publisher = "Association for Machine Translation in the Americas",
url = "https://aclanthology.org/2024.amta-presentations.15",
pages = "222--236",
abstract = "Machine translation (MT) with Large Language Models (LLMs) holds promise as a clinical translation tool with more capabilities than a traditional MT model. This work compares the quality of English to Spanish translation by three LLMs: ChatGPT3.5 Turbo, ChatGPT4o, and Aguila, against Google Translate. The test set used in this study is MedlinePlus, a parallel dataset of educational health information in English and Spanish developed by the National Library of Medicine. ChatGPT4o and Google Translate performed similarly in both automated scoring (BLEU, METEOR, and BERTscore) and human evaluation with ChatGPT3.5 Turbo not far behind. Aguila, the only LLM intended for primarily Spanish and Catalan use, surprisingly performed much worse than the other models. However, qualitative analysis of Aguila{'}s results revealed the use of Spanish word choice that may reach a broader audience.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="riina-etal-2024-evaluation">
<titleInfo>
<title>An Evaluation of English to Spanish Medical Translation by Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicholas</namePart>
<namePart type="family">Riina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Likhitha</namePart>
<namePart type="family">Patlolla</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Camilo</namePart>
<namePart type="family">Hernandez Joya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roger</namePart>
<namePart type="family">Bautista</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Melissa</namePart>
<namePart type="family">Olivar-Villanueva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anish</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 16th Conference of the Association for Machine Translation in the Americas (Volume 2: Presentations)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Martindale</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Janice</namePart>
<namePart type="family">Campbell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Konstantin</namePart>
<namePart type="family">Savenkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shivali</namePart>
<namePart type="family">Goel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Machine Translation in the Americas</publisher>
<place>
<placeTerm type="text">Chicago, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Machine translation (MT) with Large Language Models (LLMs) holds promise as a clinical translation tool with more capabilities than a traditional MT model. This work compares the quality of English to Spanish translation by three LLMs: ChatGPT3.5 Turbo, ChatGPT4o, and Aguila, against Google Translate. The test set used in this study is MedlinePlus, a parallel dataset of educational health information in English and Spanish developed by the National Library of Medicine. ChatGPT4o and Google Translate performed similarly in both automated scoring (BLEU, METEOR, and BERTscore) and human evaluation with ChatGPT3.5 Turbo not far behind. Aguila, the only LLM intended for primarily Spanish and Catalan use, surprisingly performed much worse than the other models. However, qualitative analysis of Aguila’s results revealed the use of Spanish word choice that may reach a broader audience.</abstract>
<identifier type="citekey">riina-etal-2024-evaluation</identifier>
<location>
<url>https://aclanthology.org/2024.amta-presentations.15</url>
</location>
<part>
<date>2024-09</date>
<extent unit="page">
<start>222</start>
<end>236</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T An Evaluation of English to Spanish Medical Translation by Large Language Models
%A Riina, Nicholas
%A Patlolla, Likhitha
%A Hernandez Joya, Camilo
%A Bautista, Roger
%A Olivar-Villanueva, Melissa
%A Kumar, Anish
%Y Martindale, Marianna
%Y Campbell, Janice
%Y Savenkov, Konstantin
%Y Goel, Shivali
%S Proceedings of the 16th Conference of the Association for Machine Translation in the Americas (Volume 2: Presentations)
%D 2024
%8 September
%I Association for Machine Translation in the Americas
%C Chicago, USA
%F riina-etal-2024-evaluation
%X Machine translation (MT) with Large Language Models (LLMs) holds promise as a clinical translation tool with more capabilities than a traditional MT model. This work compares the quality of English to Spanish translation by three LLMs: ChatGPT3.5 Turbo, ChatGPT4o, and Aguila, against Google Translate. The test set used in this study is MedlinePlus, a parallel dataset of educational health information in English and Spanish developed by the National Library of Medicine. ChatGPT4o and Google Translate performed similarly in both automated scoring (BLEU, METEOR, and BERTscore) and human evaluation with ChatGPT3.5 Turbo not far behind. Aguila, the only LLM intended for primarily Spanish and Catalan use, surprisingly performed much worse than the other models. However, qualitative analysis of Aguila’s results revealed the use of Spanish word choice that may reach a broader audience.
%U https://aclanthology.org/2024.amta-presentations.15
%P 222-236
Markdown (Informal)
[An Evaluation of English to Spanish Medical Translation by Large Language Models](https://aclanthology.org/2024.amta-presentations.15) (Riina et al., AMTA 2024)
ACL
- Nicholas Riina, Likhitha Patlolla, Camilo Hernandez Joya, Roger Bautista, Melissa Olivar-Villanueva, and Anish Kumar. 2024. An Evaluation of English to Spanish Medical Translation by Large Language Models. In Proceedings of the 16th Conference of the Association for Machine Translation in the Americas (Volume 2: Presentations), pages 222–236, Chicago, USA. Association for Machine Translation in the Americas.