@inproceedings{kose-etal-2026-ragturk,
title = "{RAGT}urk: Best Practices for Retrieval Augmented Generation in {T}urkish",
author = {K{\"o}se, S{\"u}ha Ka{\u{g}}an and
Baytekin, Mehmet Can and
Akta{\c{s}}, Burak and
G{\"o}r{\"u}r, Bilge Kaan and
Munis, Evren Ayberk and
Y{\i}lmaz, Deniz and
Kartal, Muhammed Yusuf and
Toraman, Cagri},
editor = {Oflazer, Kemal and
K{\"o}ksal, Abdullatif and
Varol, Onur},
booktitle = "Proceedings of the Second Workshop Natural Language Processing for {T}urkic Languages ({SIGTURK} 2026)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.sigturk-1.15/",
pages = "179--196",
ISBN = "979-8-89176-370-8",
abstract = "Retrieval-Augmented Generation (RAG) enhances LLM factuality, yet design guidance remains English-centric, limiting insights for morphologically rich languages like Turkish. We address this by constructing a comprehensive Turkish RAG dataset derived from Turkish Wikipedia and CulturaX, comprising question-answer pairs and relevant passage chunks. We benchmark seven stages of the RAG pipeline{---}from query transformation and reranking to answer refinement{---}without task-specific fine-tuning. Our results show that complex methods like HyDE maximize accuracy (85{\%}) that is considerably higher than the baseline (78.70{\%}). Also a Pareto-optimal configuration using Cross-encoder Reranking and Context Augmentation achieves comparable performance (84.60{\%}) with much lower cost. We further demonstrate that over-stacking generative modules can degrade performance by distorting morphological cues, whereas simple query clarification with robust reranking offers an effective solution."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kose-etal-2026-ragturk">
<titleInfo>
<title>RAGTurk: Best Practices for Retrieval Augmented Generation in Turkish</title>
</titleInfo>
<name type="personal">
<namePart type="given">Süha</namePart>
<namePart type="given">Kağan</namePart>
<namePart type="family">Köse</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mehmet</namePart>
<namePart type="given">Can</namePart>
<namePart type="family">Baytekin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Burak</namePart>
<namePart type="family">Aktaş</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bilge</namePart>
<namePart type="given">Kaan</namePart>
<namePart type="family">Görür</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Evren</namePart>
<namePart type="given">Ayberk</namePart>
<namePart type="family">Munis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Deniz</namePart>
<namePart type="family">Yılmaz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Muhammed</namePart>
<namePart type="given">Yusuf</namePart>
<namePart type="family">Kartal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cagri</namePart>
<namePart type="family">Toraman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop Natural Language Processing for Turkic Languages (SIGTURK 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kemal</namePart>
<namePart type="family">Oflazer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abdullatif</namePart>
<namePart type="family">Köksal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Onur</namePart>
<namePart type="family">Varol</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-370-8</identifier>
</relatedItem>
<abstract>Retrieval-Augmented Generation (RAG) enhances LLM factuality, yet design guidance remains English-centric, limiting insights for morphologically rich languages like Turkish. We address this by constructing a comprehensive Turkish RAG dataset derived from Turkish Wikipedia and CulturaX, comprising question-answer pairs and relevant passage chunks. We benchmark seven stages of the RAG pipeline—from query transformation and reranking to answer refinement—without task-specific fine-tuning. Our results show that complex methods like HyDE maximize accuracy (85%) that is considerably higher than the baseline (78.70%). Also a Pareto-optimal configuration using Cross-encoder Reranking and Context Augmentation achieves comparable performance (84.60%) with much lower cost. We further demonstrate that over-stacking generative modules can degrade performance by distorting morphological cues, whereas simple query clarification with robust reranking offers an effective solution.</abstract>
<identifier type="citekey">kose-etal-2026-ragturk</identifier>
<location>
<url>https://aclanthology.org/2026.sigturk-1.15/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>179</start>
<end>196</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T RAGTurk: Best Practices for Retrieval Augmented Generation in Turkish
%A Köse, Süha Kağan
%A Baytekin, Mehmet Can
%A Aktaş, Burak
%A Görür, Bilge Kaan
%A Munis, Evren Ayberk
%A Yılmaz, Deniz
%A Kartal, Muhammed Yusuf
%A Toraman, Cagri
%Y Oflazer, Kemal
%Y Köksal, Abdullatif
%Y Varol, Onur
%S Proceedings of the Second Workshop Natural Language Processing for Turkic Languages (SIGTURK 2026)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-370-8
%F kose-etal-2026-ragturk
%X Retrieval-Augmented Generation (RAG) enhances LLM factuality, yet design guidance remains English-centric, limiting insights for morphologically rich languages like Turkish. We address this by constructing a comprehensive Turkish RAG dataset derived from Turkish Wikipedia and CulturaX, comprising question-answer pairs and relevant passage chunks. We benchmark seven stages of the RAG pipeline—from query transformation and reranking to answer refinement—without task-specific fine-tuning. Our results show that complex methods like HyDE maximize accuracy (85%) that is considerably higher than the baseline (78.70%). Also a Pareto-optimal configuration using Cross-encoder Reranking and Context Augmentation achieves comparable performance (84.60%) with much lower cost. We further demonstrate that over-stacking generative modules can degrade performance by distorting morphological cues, whereas simple query clarification with robust reranking offers an effective solution.
%U https://aclanthology.org/2026.sigturk-1.15/
%P 179-196
Markdown (Informal)
[RAGTurk: Best Practices for Retrieval Augmented Generation in Turkish](https://aclanthology.org/2026.sigturk-1.15/) (Köse et al., SIGTURK 2026)
ACL
- Süha Kağan Köse, Mehmet Can Baytekin, Burak Aktaş, Bilge Kaan Görür, Evren Ayberk Munis, Deniz Yılmaz, Muhammed Yusuf Kartal, and Cagri Toraman. 2026. RAGTurk: Best Practices for Retrieval Augmented Generation in Turkish. In Proceedings of the Second Workshop Natural Language Processing for Turkic Languages (SIGTURK 2026), pages 179–196, Rabat, Morocco. Association for Computational Linguistics.