@inproceedings{petruniv-etal-2026-simidioms,
title = "{S}im{I}dioms: A Corpus and Benchmark for {U}krainian Idiom Translation",
author = "Petruniv, Yaryna and
Makogon, Iuliia and
Kyslyi, Roman",
editor = "Romanyshyn, Mariana",
booktitle = "Proceedings of the Fifth {U}krainian Natural Language Processing Conference ({UNLP} 2026)",
month = may,
year = "2026",
address = "Lviv, Ukraine",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.unlp-1.5/",
pages = "41--52",
ISBN = "979-8-89176-359-3",
abstract = "We present a corpus of aligned Ukrainian{--}English idiomatic expressions and a comprehensive evaluation of six large language models on the task of translating sentences containing idioms. The corpus is constructed by linking entries across multiple phraseological dictionaries and the MIDAS corpus using vector similarity search, enriched with figurative meanings, contextual sentences from the UberText fiction corpus, and semantic transparency scores. We evaluate Gemini 2.5 Flash, Claude Haiku 4.5, Gemma 3 12B, Qwen3-30B-A3B, LapaLM, and Tiny Aya Global in both Ukrainian-to-English and English-to-Ukrainian directions under default and context-augmented prompting. Our evaluation of 65{\{},{\}}723 translations reveals a pronounced direction asymmetry, with all models performing substantially worse when translating into Ukrainian. Providing figurative meaning and target idiom candidates improves quality for most models in Ukrainian-to-English but has limited effect in the reverse direction. We additionally show that semantic transparency of idioms is only weakly correlated with translation quality. We release the corpus and evaluation framework to support research on idiomatic translation for mid-resource languages."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="petruniv-etal-2026-simidioms">
<titleInfo>
<title>SimIdioms: A Corpus and Benchmark for Ukrainian Idiom Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaryna</namePart>
<namePart type="family">Petruniv</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iuliia</namePart>
<namePart type="family">Makogon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roman</namePart>
<namePart type="family">Kyslyi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Ukrainian Natural Language Processing Conference (UNLP 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mariana</namePart>
<namePart type="family">Romanyshyn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Lviv, Ukraine</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-359-3</identifier>
</relatedItem>
<abstract>We present a corpus of aligned Ukrainian–English idiomatic expressions and a comprehensive evaluation of six large language models on the task of translating sentences containing idioms. The corpus is constructed by linking entries across multiple phraseological dictionaries and the MIDAS corpus using vector similarity search, enriched with figurative meanings, contextual sentences from the UberText fiction corpus, and semantic transparency scores. We evaluate Gemini 2.5 Flash, Claude Haiku 4.5, Gemma 3 12B, Qwen3-30B-A3B, LapaLM, and Tiny Aya Global in both Ukrainian-to-English and English-to-Ukrainian directions under default and context-augmented prompting. Our evaluation of 65{,}723 translations reveals a pronounced direction asymmetry, with all models performing substantially worse when translating into Ukrainian. Providing figurative meaning and target idiom candidates improves quality for most models in Ukrainian-to-English but has limited effect in the reverse direction. We additionally show that semantic transparency of idioms is only weakly correlated with translation quality. We release the corpus and evaluation framework to support research on idiomatic translation for mid-resource languages.</abstract>
<identifier type="citekey">petruniv-etal-2026-simidioms</identifier>
<location>
<url>https://aclanthology.org/2026.unlp-1.5/</url>
</location>
<part>
<date>2026-05</date>
<extent unit="page">
<start>41</start>
<end>52</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SimIdioms: A Corpus and Benchmark for Ukrainian Idiom Translation
%A Petruniv, Yaryna
%A Makogon, Iuliia
%A Kyslyi, Roman
%Y Romanyshyn, Mariana
%S Proceedings of the Fifth Ukrainian Natural Language Processing Conference (UNLP 2026)
%D 2026
%8 May
%I Association for Computational Linguistics
%C Lviv, Ukraine
%@ 979-8-89176-359-3
%F petruniv-etal-2026-simidioms
%X We present a corpus of aligned Ukrainian–English idiomatic expressions and a comprehensive evaluation of six large language models on the task of translating sentences containing idioms. The corpus is constructed by linking entries across multiple phraseological dictionaries and the MIDAS corpus using vector similarity search, enriched with figurative meanings, contextual sentences from the UberText fiction corpus, and semantic transparency scores. We evaluate Gemini 2.5 Flash, Claude Haiku 4.5, Gemma 3 12B, Qwen3-30B-A3B, LapaLM, and Tiny Aya Global in both Ukrainian-to-English and English-to-Ukrainian directions under default and context-augmented prompting. Our evaluation of 65{,}723 translations reveals a pronounced direction asymmetry, with all models performing substantially worse when translating into Ukrainian. Providing figurative meaning and target idiom candidates improves quality for most models in Ukrainian-to-English but has limited effect in the reverse direction. We additionally show that semantic transparency of idioms is only weakly correlated with translation quality. We release the corpus and evaluation framework to support research on idiomatic translation for mid-resource languages.
%U https://aclanthology.org/2026.unlp-1.5/
%P 41-52Markdown (Informal)
[SimIdioms: A Corpus and Benchmark for Ukrainian Idiom Translation](https://aclanthology.org/2026.unlp-1.5/) (Petruniv et al., UNLP 2026)
ACL