@inproceedings{nosenko-kilko-2026-rag,
title = "{RAG} Pipeline Strategies for {U}krainian Multi-Domain Document Understanding Task",
author = "Nosenko, Mykola and
Kilko, Pavlo",
editor = "Romanyshyn, Mariana",
booktitle = "Proceedings of the Fifth {U}krainian Natural Language Processing Conference ({UNLP} 2026)",
month = may,
year = "2026",
address = "Lviv, Ukraine",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.unlp-1.21/",
pages = "240--248",
ISBN = "979-8-89176-359-3",
abstract = "In this work, we present top-performing solution to the UNLP 2026 Shared Task on Ukrainian Multi-Domain Document Understanding. This task focuses on answering multiple-choice questions grounded in domain-specific Ukrainian documents, while also requiring systems to identify the source document and page. We developed a modular retrieval-augmented generation (RAG) pipeline and conducted a series of ablation experiments over its individual components to identify the best-performing strategy at each stage. Based on our evaluation results, we propose two final pipeline configurations that differ in their computational cost and retrieval accuracy: a stronger but more compute-intensive document-level augmentation approach and a lighter summary-based augmentation that is suitable for constrained environments. Our submission achieved 3rd place on the private leaderboard. This demonstrates that isolated curation of RAG components can yield strong performance for Ukrainian document grounded question answering without additional language model adaptations."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nosenko-kilko-2026-rag">
<titleInfo>
<title>RAG Pipeline Strategies for Ukrainian Multi-Domain Document Understanding Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mykola</namePart>
<namePart type="family">Nosenko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pavlo</namePart>
<namePart type="family">Kilko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Ukrainian Natural Language Processing Conference (UNLP 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mariana</namePart>
<namePart type="family">Romanyshyn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Lviv, Ukraine</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-359-3</identifier>
</relatedItem>
<abstract>In this work, we present top-performing solution to the UNLP 2026 Shared Task on Ukrainian Multi-Domain Document Understanding. This task focuses on answering multiple-choice questions grounded in domain-specific Ukrainian documents, while also requiring systems to identify the source document and page. We developed a modular retrieval-augmented generation (RAG) pipeline and conducted a series of ablation experiments over its individual components to identify the best-performing strategy at each stage. Based on our evaluation results, we propose two final pipeline configurations that differ in their computational cost and retrieval accuracy: a stronger but more compute-intensive document-level augmentation approach and a lighter summary-based augmentation that is suitable for constrained environments. Our submission achieved 3rd place on the private leaderboard. This demonstrates that isolated curation of RAG components can yield strong performance for Ukrainian document grounded question answering without additional language model adaptations.</abstract>
<identifier type="citekey">nosenko-kilko-2026-rag</identifier>
<location>
<url>https://aclanthology.org/2026.unlp-1.21/</url>
</location>
<part>
<date>2026-05</date>
<extent unit="page">
<start>240</start>
<end>248</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T RAG Pipeline Strategies for Ukrainian Multi-Domain Document Understanding Task
%A Nosenko, Mykola
%A Kilko, Pavlo
%Y Romanyshyn, Mariana
%S Proceedings of the Fifth Ukrainian Natural Language Processing Conference (UNLP 2026)
%D 2026
%8 May
%I Association for Computational Linguistics
%C Lviv, Ukraine
%@ 979-8-89176-359-3
%F nosenko-kilko-2026-rag
%X In this work, we present top-performing solution to the UNLP 2026 Shared Task on Ukrainian Multi-Domain Document Understanding. This task focuses on answering multiple-choice questions grounded in domain-specific Ukrainian documents, while also requiring systems to identify the source document and page. We developed a modular retrieval-augmented generation (RAG) pipeline and conducted a series of ablation experiments over its individual components to identify the best-performing strategy at each stage. Based on our evaluation results, we propose two final pipeline configurations that differ in their computational cost and retrieval accuracy: a stronger but more compute-intensive document-level augmentation approach and a lighter summary-based augmentation that is suitable for constrained environments. Our submission achieved 3rd place on the private leaderboard. This demonstrates that isolated curation of RAG components can yield strong performance for Ukrainian document grounded question answering without additional language model adaptations.
%U https://aclanthology.org/2026.unlp-1.21/
%P 240-248
Markdown (Informal)
[RAG Pipeline Strategies for Ukrainian Multi-Domain Document Understanding Task](https://aclanthology.org/2026.unlp-1.21/) (Nosenko & Kilko, UNLP 2026)
ACL