@inproceedings{tereschenko-etal-2025-evaluating,
title = "Evaluating {O}pen{AI} {GPT} Models for Translation of Endangered {U}ralic{L}anguages: A Comparison of Reasoning and Non-Reasoning Architectures",
author = {Tereschenko, Yehor and
H{\"a}m{\"a}l{\"a}inen, Mika and
Myroniuk, Svitlana},
editor = {H{\"a}m{\"a}l{\"a}inen, Mika and
Rie{\ss}ler, Michael and
Morooka, Eiaki V. and
Kharlashkin, Lev},
booktitle = "Proceedings of the 10th International Workshop on Computational Linguistics for Uralic Languages",
month = dec,
year = "2025",
address = "Joensuu, Finland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.iwclul-1.17/",
pages = "131--139",
ISBN = "979-8-89176-360-9",
abstract = "The evaluation of Large Language Models (LLMs) for translation tasks has primarily focused on high-resource languages, leaving a significant gap in understanding their performance on low-resource and endangered languages. This study presents a comprehensive comparison of OpenAI{'}s GPT models, specifically examining the differences between reasoning and non-reasoning architectures for translating between Finnish and four low-resource Uralic languages: Komi-Zyrian, Moksha, Erzya, and Udmurt. Using a parallel corpus of literary texts, we evaluate model willingness to attempt translation through refusal rate analysis across different model architectures. Our findings reveal significant performance variations between reasoning and non-reasoning models, with reasoning models showing 16 percentage points lower refusal rates. The results provide valuable insights for researchers and practitioners working with Uralic languages and contribute to the broader understanding of reasoning model capabilities for endangered language preservation."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tereschenko-etal-2025-evaluating">
<titleInfo>
<title>Evaluating OpenAI GPT Models for Translation of Endangered UralicLanguages: A Comparison of Reasoning and Non-Reasoning Architectures</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yehor</namePart>
<namePart type="family">Tereschenko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mika</namePart>
<namePart type="family">Hämäläinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Svitlana</namePart>
<namePart type="family">Myroniuk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th International Workshop on Computational Linguistics for Uralic Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mika</namePart>
<namePart type="family">Hämäläinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Rießler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eiaki</namePart>
<namePart type="given">V</namePart>
<namePart type="family">Morooka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lev</namePart>
<namePart type="family">Kharlashkin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Joensuu, Finland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-360-9</identifier>
</relatedItem>
<abstract>The evaluation of Large Language Models (LLMs) for translation tasks has primarily focused on high-resource languages, leaving a significant gap in understanding their performance on low-resource and endangered languages. This study presents a comprehensive comparison of OpenAI’s GPT models, specifically examining the differences between reasoning and non-reasoning architectures for translating between Finnish and four low-resource Uralic languages: Komi-Zyrian, Moksha, Erzya, and Udmurt. Using a parallel corpus of literary texts, we evaluate model willingness to attempt translation through refusal rate analysis across different model architectures. Our findings reveal significant performance variations between reasoning and non-reasoning models, with reasoning models showing 16 percentage points lower refusal rates. The results provide valuable insights for researchers and practitioners working with Uralic languages and contribute to the broader understanding of reasoning model capabilities for endangered language preservation.</abstract>
<identifier type="citekey">tereschenko-etal-2025-evaluating</identifier>
<location>
<url>https://aclanthology.org/2025.iwclul-1.17/</url>
</location>
<part>
<date>2025-12</date>
<extent unit="page">
<start>131</start>
<end>139</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluating OpenAI GPT Models for Translation of Endangered UralicLanguages: A Comparison of Reasoning and Non-Reasoning Architectures
%A Tereschenko, Yehor
%A Hämäläinen, Mika
%A Myroniuk, Svitlana
%Y Hämäläinen, Mika
%Y Rießler, Michael
%Y Morooka, Eiaki V.
%Y Kharlashkin, Lev
%S Proceedings of the 10th International Workshop on Computational Linguistics for Uralic Languages
%D 2025
%8 December
%I Association for Computational Linguistics
%C Joensuu, Finland
%@ 979-8-89176-360-9
%F tereschenko-etal-2025-evaluating
%X The evaluation of Large Language Models (LLMs) for translation tasks has primarily focused on high-resource languages, leaving a significant gap in understanding their performance on low-resource and endangered languages. This study presents a comprehensive comparison of OpenAI’s GPT models, specifically examining the differences between reasoning and non-reasoning architectures for translating between Finnish and four low-resource Uralic languages: Komi-Zyrian, Moksha, Erzya, and Udmurt. Using a parallel corpus of literary texts, we evaluate model willingness to attempt translation through refusal rate analysis across different model architectures. Our findings reveal significant performance variations between reasoning and non-reasoning models, with reasoning models showing 16 percentage points lower refusal rates. The results provide valuable insights for researchers and practitioners working with Uralic languages and contribute to the broader understanding of reasoning model capabilities for endangered language preservation.
%U https://aclanthology.org/2025.iwclul-1.17/
%P 131-139
Markdown (Informal)
[Evaluating OpenAI GPT Models for Translation of Endangered UralicLanguages: A Comparison of Reasoning and Non-Reasoning Architectures](https://aclanthology.org/2025.iwclul-1.17/) (Tereschenko et al., IWCLUL 2025)
ACL