@inproceedings{donthi-etal-2025-improving,
title = "Improving {LLM} Abilities in Idiomatic Translation",
author = "Donthi, Sundesh and
Spencer, Maximilian and
Patel, Om B. and
Doh, Joon Young and
Rodan, Eid and
Zhu, Kevin and
O{'}Brien, Sean",
editor = "Hettiarachchi, Hansi and
Ranasinghe, Tharindu and
Rayson, Paul and
Mitkov, Ruslan and
Gaber, Mohamed and
Premasiri, Damith and
Tan, Fiona Anting and
Uyangodage, Lasitha",
booktitle = "Proceedings of the First Workshop on Language Models for Low-Resource Languages",
month = jan,
year = "2025",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.loreslm-1.13/",
pages = "175--181",
abstract = "Translating idiomatic expressions remains a challenge for large language models (LLMs), as they often produce literal, semantically incorrect translations{---}for instance, directly converting {\textquotedblleft}break a leg{\textquotedblright} into a nonsensical phrase in the target language. While external resources like IdiomKB can supply the figurative meaning and thus yield semantically accurate translations, this approach does not preserve the cultural and stylistic nuances that make idioms so distinctive. Our study focuses on idiomatic translations across multiple languages, including Chinese (ZH), Urdu (UR), and Hindi (HI), with clearly defined abbreviations for each. We propose two methods for improving idiomatic translation fidelity: a Semantic Idiom Alignment (SIA) approach that uses pre-trained sentence embeddings to identify target-language idioms, and a Language-Model-based Idiom Alignment (LIA) approach that prompts an LLM to suggest appropriate idiom counterparts. Human evaluations across multiple language pairs show that SIA better preserves idiomatic style. To support this work, we introduce idiom datasets in low-resource languages (Urdu and Hindi). Our results indicate that aligning idioms at the semantic level can improve cross-lingual style preservation and cultural authenticity."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="donthi-etal-2025-improving">
<titleInfo>
<title>Improving LLM Abilities in Idiomatic Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sundesh</namePart>
<namePart type="family">Donthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maximilian</namePart>
<namePart type="family">Spencer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Om</namePart>
<namePart type="given">B</namePart>
<namePart type="family">Patel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joon</namePart>
<namePart type="given">Young</namePart>
<namePart type="family">Doh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eid</namePart>
<namePart type="family">Rodan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sean</namePart>
<namePart type="family">O’Brien</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Language Models for Low-Resource Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hansi</namePart>
<namePart type="family">Hettiarachchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tharindu</namePart>
<namePart type="family">Ranasinghe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Rayson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohamed</namePart>
<namePart type="family">Gaber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Damith</namePart>
<namePart type="family">Premasiri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fiona</namePart>
<namePart type="given">Anting</namePart>
<namePart type="family">Tan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lasitha</namePart>
<namePart type="family">Uyangodage</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Translating idiomatic expressions remains a challenge for large language models (LLMs), as they often produce literal, semantically incorrect translations—for instance, directly converting “break a leg” into a nonsensical phrase in the target language. While external resources like IdiomKB can supply the figurative meaning and thus yield semantically accurate translations, this approach does not preserve the cultural and stylistic nuances that make idioms so distinctive. Our study focuses on idiomatic translations across multiple languages, including Chinese (ZH), Urdu (UR), and Hindi (HI), with clearly defined abbreviations for each. We propose two methods for improving idiomatic translation fidelity: a Semantic Idiom Alignment (SIA) approach that uses pre-trained sentence embeddings to identify target-language idioms, and a Language-Model-based Idiom Alignment (LIA) approach that prompts an LLM to suggest appropriate idiom counterparts. Human evaluations across multiple language pairs show that SIA better preserves idiomatic style. To support this work, we introduce idiom datasets in low-resource languages (Urdu and Hindi). Our results indicate that aligning idioms at the semantic level can improve cross-lingual style preservation and cultural authenticity.</abstract>
<identifier type="citekey">donthi-etal-2025-improving</identifier>
<location>
<url>https://aclanthology.org/2025.loreslm-1.13/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>175</start>
<end>181</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Improving LLM Abilities in Idiomatic Translation
%A Donthi, Sundesh
%A Spencer, Maximilian
%A Patel, Om B.
%A Doh, Joon Young
%A Rodan, Eid
%A Zhu, Kevin
%A O’Brien, Sean
%Y Hettiarachchi, Hansi
%Y Ranasinghe, Tharindu
%Y Rayson, Paul
%Y Mitkov, Ruslan
%Y Gaber, Mohamed
%Y Premasiri, Damith
%Y Tan, Fiona Anting
%Y Uyangodage, Lasitha
%S Proceedings of the First Workshop on Language Models for Low-Resource Languages
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates
%F donthi-etal-2025-improving
%X Translating idiomatic expressions remains a challenge for large language models (LLMs), as they often produce literal, semantically incorrect translations—for instance, directly converting “break a leg” into a nonsensical phrase in the target language. While external resources like IdiomKB can supply the figurative meaning and thus yield semantically accurate translations, this approach does not preserve the cultural and stylistic nuances that make idioms so distinctive. Our study focuses on idiomatic translations across multiple languages, including Chinese (ZH), Urdu (UR), and Hindi (HI), with clearly defined abbreviations for each. We propose two methods for improving idiomatic translation fidelity: a Semantic Idiom Alignment (SIA) approach that uses pre-trained sentence embeddings to identify target-language idioms, and a Language-Model-based Idiom Alignment (LIA) approach that prompts an LLM to suggest appropriate idiom counterparts. Human evaluations across multiple language pairs show that SIA better preserves idiomatic style. To support this work, we introduce idiom datasets in low-resource languages (Urdu and Hindi). Our results indicate that aligning idioms at the semantic level can improve cross-lingual style preservation and cultural authenticity.
%U https://aclanthology.org/2025.loreslm-1.13/
%P 175-181
Markdown (Informal)
[Improving LLM Abilities in Idiomatic Translation](https://aclanthology.org/2025.loreslm-1.13/) (Donthi et al., LoResLM 2025)
ACL
- Sundesh Donthi, Maximilian Spencer, Om B. Patel, Joon Young Doh, Eid Rodan, Kevin Zhu, and Sean O’Brien. 2025. Improving LLM Abilities in Idiomatic Translation. In Proceedings of the First Workshop on Language Models for Low-Resource Languages, pages 175–181, Abu Dhabi, United Arab Emirates. Association for Computational Linguistics.