@inproceedings{gebesce-etal-2026-overview,
title = "Overview of the {SIGTURK} 2026 Shared Task: Terminology-Aware Machine Translation for {E}nglish{--}{T}urkish Scientific Texts",
author = {Gebe{\c{s}}{\c{c}}e, Ali and
Safa, Abdulfattah and
Amasya, Ege U{\u{g}}ur and
{\c{S}}ahin, G{\"o}zde G{\"u}l},
editor = {Oflazer, Kemal and
K{\"o}ksal, Abdullatif and
Varol, Onur},
booktitle = "Proceedings of the Second Workshop Natural Language Processing for {T}urkic Languages ({SIGTURK} 2026)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.sigturk-1.20/",
pages = "236--247",
ISBN = "979-8-89176-370-8",
abstract = "This paper presents an overview of the SIGTURK 2026 Shared Task on Terminology-Aware Machine Translation for English-Turkish Scientific Texts. We address the critical challenge of terminological accuracy in low-resource settings by constructing the first terminology-rich English-Turkish parallel corpus, comprising 3,300 sentence pairs from STEM domains with 10,157 expert-validated term pairs. The shared task consists of three subtasks: term detection, expert-guided correction, and end-to-end post-editing. We evaluate state-of-the-art baselines (including GPT-5.2 and Claude Sonnet 4.5) alongside participant systems employing diverse strategies from fine-tuning to Retrieval-Augmented Generation (RAG). Our results highlight that while massive generalist models dominate zero-shot detection, smaller, domain-adapted models using Supervised Fine-Tuning and Reinforcement Learning can significantly outperform them in end-to-end post-editing. Furthermore, we find that rigid retrieval pipelines often disrupt fluency, whereas Chain-of-Thought prompting allows models to integrate terminology more naturally. Despite these advances, a significant gap remains between automated systems and human expert performance in strict terminology correction."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gebesce-etal-2026-overview">
<titleInfo>
<title>Overview of the SIGTURK 2026 Shared Task: Terminology-Aware Machine Translation for English–Turkish Scientific Texts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ali</namePart>
<namePart type="family">Gebeşçe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abdulfattah</namePart>
<namePart type="family">Safa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ege</namePart>
<namePart type="given">Uğur</namePart>
<namePart type="family">Amasya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gözde</namePart>
<namePart type="given">Gül</namePart>
<namePart type="family">Şahin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop Natural Language Processing for Turkic Languages (SIGTURK 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kemal</namePart>
<namePart type="family">Oflazer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abdullatif</namePart>
<namePart type="family">Köksal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Onur</namePart>
<namePart type="family">Varol</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-370-8</identifier>
</relatedItem>
<abstract>This paper presents an overview of the SIGTURK 2026 Shared Task on Terminology-Aware Machine Translation for English-Turkish Scientific Texts. We address the critical challenge of terminological accuracy in low-resource settings by constructing the first terminology-rich English-Turkish parallel corpus, comprising 3,300 sentence pairs from STEM domains with 10,157 expert-validated term pairs. The shared task consists of three subtasks: term detection, expert-guided correction, and end-to-end post-editing. We evaluate state-of-the-art baselines (including GPT-5.2 and Claude Sonnet 4.5) alongside participant systems employing diverse strategies from fine-tuning to Retrieval-Augmented Generation (RAG). Our results highlight that while massive generalist models dominate zero-shot detection, smaller, domain-adapted models using Supervised Fine-Tuning and Reinforcement Learning can significantly outperform them in end-to-end post-editing. Furthermore, we find that rigid retrieval pipelines often disrupt fluency, whereas Chain-of-Thought prompting allows models to integrate terminology more naturally. Despite these advances, a significant gap remains between automated systems and human expert performance in strict terminology correction.</abstract>
<identifier type="citekey">gebesce-etal-2026-overview</identifier>
<location>
<url>https://aclanthology.org/2026.sigturk-1.20/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>236</start>
<end>247</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Overview of the SIGTURK 2026 Shared Task: Terminology-Aware Machine Translation for English–Turkish Scientific Texts
%A Gebeşçe, Ali
%A Safa, Abdulfattah
%A Amasya, Ege Uğur
%A Şahin, Gözde Gül
%Y Oflazer, Kemal
%Y Köksal, Abdullatif
%Y Varol, Onur
%S Proceedings of the Second Workshop Natural Language Processing for Turkic Languages (SIGTURK 2026)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-370-8
%F gebesce-etal-2026-overview
%X This paper presents an overview of the SIGTURK 2026 Shared Task on Terminology-Aware Machine Translation for English-Turkish Scientific Texts. We address the critical challenge of terminological accuracy in low-resource settings by constructing the first terminology-rich English-Turkish parallel corpus, comprising 3,300 sentence pairs from STEM domains with 10,157 expert-validated term pairs. The shared task consists of three subtasks: term detection, expert-guided correction, and end-to-end post-editing. We evaluate state-of-the-art baselines (including GPT-5.2 and Claude Sonnet 4.5) alongside participant systems employing diverse strategies from fine-tuning to Retrieval-Augmented Generation (RAG). Our results highlight that while massive generalist models dominate zero-shot detection, smaller, domain-adapted models using Supervised Fine-Tuning and Reinforcement Learning can significantly outperform them in end-to-end post-editing. Furthermore, we find that rigid retrieval pipelines often disrupt fluency, whereas Chain-of-Thought prompting allows models to integrate terminology more naturally. Despite these advances, a significant gap remains between automated systems and human expert performance in strict terminology correction.
%U https://aclanthology.org/2026.sigturk-1.20/
%P 236-247
Markdown (Informal)
[Overview of the SIGTURK 2026 Shared Task: Terminology-Aware Machine Translation for English–Turkish Scientific Texts](https://aclanthology.org/2026.sigturk-1.20/) (Gebeşçe et al., SIGTURK 2026)
ACL