@inproceedings{galeshchuk-etal-2026-toward,
title = "Toward a Gold-Standard Benchmark for Evaluating {U}krainian Language Proficiency in {LLM}s",
author = "Galeshchuk, Svitlana and
Maksymiuk, Yuliia and
Chernobrov, Yuliia and
Stankevych, Nina and
Antoniv, Oleksandra and
Faryna, Nataliia and
Popkova, Oksana",
editor = "Romanyshyn, Mariana",
booktitle = "Proceedings of the Fifth {U}krainian Natural Language Processing Conference ({UNLP} 2026)",
month = may,
year = "2026",
address = "Lviv, Ukraine",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.unlp-1.12/",
pages = "121--135",
ISBN = "979-8-89176-359-3",
abstract = "The paper presents an expert-curated benchmark for assessing Ukrainian proficiency in LLMs, focusing on grammar and orthography as core components of language competence. Prepared by professional linguists, the proposed gold-standard dataset is designed to test normative Ukrainian usage.The benchmark is further used to evaluate a range of LLMs, including Ukrainian-focused, multilingual, and large-scale models, under zero-shot and few-shot prompting in Ukrainian and English. Across these settings, smaller models achieve no more than 42.1{\%} accuracy, while large-scale LLMs reach up to 59.6{\%}. These results show that standard Ukrainian remains challenging for current LLMs and highlight the need for stronger language-specific evaluation and adaptation."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="galeshchuk-etal-2026-toward">
<titleInfo>
<title>Toward a Gold-Standard Benchmark for Evaluating Ukrainian Language Proficiency in LLMs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Svitlana</namePart>
<namePart type="family">Galeshchuk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuliia</namePart>
<namePart type="family">Maksymiuk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuliia</namePart>
<namePart type="family">Chernobrov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nina</namePart>
<namePart type="family">Stankevych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oleksandra</namePart>
<namePart type="family">Antoniv</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nataliia</namePart>
<namePart type="family">Faryna</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oksana</namePart>
<namePart type="family">Popkova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Ukrainian Natural Language Processing Conference (UNLP 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mariana</namePart>
<namePart type="family">Romanyshyn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Lviv, Ukraine</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-359-3</identifier>
</relatedItem>
<abstract>The paper presents an expert-curated benchmark for assessing Ukrainian proficiency in LLMs, focusing on grammar and orthography as core components of language competence. Prepared by professional linguists, the proposed gold-standard dataset is designed to test normative Ukrainian usage.The benchmark is further used to evaluate a range of LLMs, including Ukrainian-focused, multilingual, and large-scale models, under zero-shot and few-shot prompting in Ukrainian and English. Across these settings, smaller models achieve no more than 42.1% accuracy, while large-scale LLMs reach up to 59.6%. These results show that standard Ukrainian remains challenging for current LLMs and highlight the need for stronger language-specific evaluation and adaptation.</abstract>
<identifier type="citekey">galeshchuk-etal-2026-toward</identifier>
<location>
<url>https://aclanthology.org/2026.unlp-1.12/</url>
</location>
<part>
<date>2026-05</date>
<extent unit="page">
<start>121</start>
<end>135</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Toward a Gold-Standard Benchmark for Evaluating Ukrainian Language Proficiency in LLMs
%A Galeshchuk, Svitlana
%A Maksymiuk, Yuliia
%A Chernobrov, Yuliia
%A Stankevych, Nina
%A Antoniv, Oleksandra
%A Faryna, Nataliia
%A Popkova, Oksana
%Y Romanyshyn, Mariana
%S Proceedings of the Fifth Ukrainian Natural Language Processing Conference (UNLP 2026)
%D 2026
%8 May
%I Association for Computational Linguistics
%C Lviv, Ukraine
%@ 979-8-89176-359-3
%F galeshchuk-etal-2026-toward
%X The paper presents an expert-curated benchmark for assessing Ukrainian proficiency in LLMs, focusing on grammar and orthography as core components of language competence. Prepared by professional linguists, the proposed gold-standard dataset is designed to test normative Ukrainian usage.The benchmark is further used to evaluate a range of LLMs, including Ukrainian-focused, multilingual, and large-scale models, under zero-shot and few-shot prompting in Ukrainian and English. Across these settings, smaller models achieve no more than 42.1% accuracy, while large-scale LLMs reach up to 59.6%. These results show that standard Ukrainian remains challenging for current LLMs and highlight the need for stronger language-specific evaluation and adaptation.
%U https://aclanthology.org/2026.unlp-1.12/
%P 121-135
Markdown (Informal)
[Toward a Gold-Standard Benchmark for Evaluating Ukrainian Language Proficiency in LLMs](https://aclanthology.org/2026.unlp-1.12/) (Galeshchuk et al., UNLP 2026)
ACL