@inproceedings{aktas-etal-2026-birdturk,
title = "{BIRDT}urk: Adaptation of the {BIRD} Text-to-{SQL} Dataset to {T}urkish",
author = {Akta{\c{s}}, Burak and
Baytekin, Mehmet Can and
K{\"o}se, S{\"u}ha Ka{\u{g}}an and
{\.I}lbilgi, {\"O}mer and
Y{\i}lmaz, Elif {\"O}zge and
Toraman, Cagri and
G{\"o}r{\"u}r, Bilge Kaan},
editor = {Oflazer, Kemal and
K{\"o}ksal, Abdullatif and
Varol, Onur},
booktitle = "Proceedings of the Second Workshop Natural Language Processing for {T}urkic Languages ({SIGTURK} 2026)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.sigturk-1.13/",
pages = "155--171",
ISBN = "979-8-89176-370-8",
abstract = "Text-to-SQL systems have achieved strong performance on English benchmarks, yet their behavior in morphologically rich, low-resource languages remains largely unexplored. We introduce BIRDTurk, the first Turkish adaptation of the BIRD benchmark, constructed through a controlled translation pipeline that adapts schema identifiers to Turkish while strictly preserving the logical structure and execution semantics of SQL queries and databases. Translation quality is validated on a sample size determined by the Central Limit Theorem to ensure 95{\%} confidence, achieving 98.15{\%} accuracy on human-evaluated samples. Using BIRDTurk, we evaluate inference-based prompting, agentic multi-stage reasoning, and supervised fine-tuning. Our results reveal that Turkish introduces consistent performance degradation{--}driven by both structural linguistic divergence and underrepresentation in LLM pretraining{--}while agentic reasoning demonstrates stronger cross-lingual robustness. Supervised fine-tuning remains challenging for standard multilingual baselines but scales effectively with modern instruction-tuned models. BIRDTurk provides a controlled testbed for cross-lingual Text-to-SQL evaluation under realistic database conditions. We release the training and development splits to support future research."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="aktas-etal-2026-birdturk">
<titleInfo>
<title>BIRDTurk: Adaptation of the BIRD Text-to-SQL Dataset to Turkish</title>
</titleInfo>
<name type="personal">
<namePart type="given">Burak</namePart>
<namePart type="family">Aktaş</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mehmet</namePart>
<namePart type="given">Can</namePart>
<namePart type="family">Baytekin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Süha</namePart>
<namePart type="given">Kağan</namePart>
<namePart type="family">Köse</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ömer</namePart>
<namePart type="family">İlbilgi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elif</namePart>
<namePart type="given">Özge</namePart>
<namePart type="family">Yılmaz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cagri</namePart>
<namePart type="family">Toraman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bilge</namePart>
<namePart type="given">Kaan</namePart>
<namePart type="family">Görür</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop Natural Language Processing for Turkic Languages (SIGTURK 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kemal</namePart>
<namePart type="family">Oflazer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abdullatif</namePart>
<namePart type="family">Köksal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Onur</namePart>
<namePart type="family">Varol</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-370-8</identifier>
</relatedItem>
<abstract>Text-to-SQL systems have achieved strong performance on English benchmarks, yet their behavior in morphologically rich, low-resource languages remains largely unexplored. We introduce BIRDTurk, the first Turkish adaptation of the BIRD benchmark, constructed through a controlled translation pipeline that adapts schema identifiers to Turkish while strictly preserving the logical structure and execution semantics of SQL queries and databases. Translation quality is validated on a sample size determined by the Central Limit Theorem to ensure 95% confidence, achieving 98.15% accuracy on human-evaluated samples. Using BIRDTurk, we evaluate inference-based prompting, agentic multi-stage reasoning, and supervised fine-tuning. Our results reveal that Turkish introduces consistent performance degradation–driven by both structural linguistic divergence and underrepresentation in LLM pretraining–while agentic reasoning demonstrates stronger cross-lingual robustness. Supervised fine-tuning remains challenging for standard multilingual baselines but scales effectively with modern instruction-tuned models. BIRDTurk provides a controlled testbed for cross-lingual Text-to-SQL evaluation under realistic database conditions. We release the training and development splits to support future research.</abstract>
<identifier type="citekey">aktas-etal-2026-birdturk</identifier>
<location>
<url>https://aclanthology.org/2026.sigturk-1.13/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>155</start>
<end>171</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BIRDTurk: Adaptation of the BIRD Text-to-SQL Dataset to Turkish
%A Aktaş, Burak
%A Baytekin, Mehmet Can
%A Köse, Süha Kağan
%A İlbilgi, Ömer
%A Yılmaz, Elif Özge
%A Toraman, Cagri
%A Görür, Bilge Kaan
%Y Oflazer, Kemal
%Y Köksal, Abdullatif
%Y Varol, Onur
%S Proceedings of the Second Workshop Natural Language Processing for Turkic Languages (SIGTURK 2026)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-370-8
%F aktas-etal-2026-birdturk
%X Text-to-SQL systems have achieved strong performance on English benchmarks, yet their behavior in morphologically rich, low-resource languages remains largely unexplored. We introduce BIRDTurk, the first Turkish adaptation of the BIRD benchmark, constructed through a controlled translation pipeline that adapts schema identifiers to Turkish while strictly preserving the logical structure and execution semantics of SQL queries and databases. Translation quality is validated on a sample size determined by the Central Limit Theorem to ensure 95% confidence, achieving 98.15% accuracy on human-evaluated samples. Using BIRDTurk, we evaluate inference-based prompting, agentic multi-stage reasoning, and supervised fine-tuning. Our results reveal that Turkish introduces consistent performance degradation–driven by both structural linguistic divergence and underrepresentation in LLM pretraining–while agentic reasoning demonstrates stronger cross-lingual robustness. Supervised fine-tuning remains challenging for standard multilingual baselines but scales effectively with modern instruction-tuned models. BIRDTurk provides a controlled testbed for cross-lingual Text-to-SQL evaluation under realistic database conditions. We release the training and development splits to support future research.
%U https://aclanthology.org/2026.sigturk-1.13/
%P 155-171
Markdown (Informal)
[BIRDTurk: Adaptation of the BIRD Text-to-SQL Dataset to Turkish](https://aclanthology.org/2026.sigturk-1.13/) (Aktaş et al., SIGTURK 2026)
ACL
- Burak Aktaş, Mehmet Can Baytekin, Süha Kağan Köse, Ömer İlbilgi, Elif Özge Yılmaz, Cagri Toraman, and Bilge Kaan Görür. 2026. BIRDTurk: Adaptation of the BIRD Text-to-SQL Dataset to Turkish. In Proceedings of the Second Workshop Natural Language Processing for Turkic Languages (SIGTURK 2026), pages 155–171, Rabat, Morocco. Association for Computational Linguistics.