@inproceedings{ipa-etal-2025-empowering,
title = "Empowering Low-Resource Languages: {T}ra{S}e Architecture for Enhanced Retrieval-Augmented Generation in {B}angla",
author = "Ipa, Atia Shahnaz and
Rony, Mohammad Abu Tareq and
Islam, Mohammad Shariful",
editor = "Truong, Sang and
Putri, Rifki Afina and
Nguyen, Duc and
Wang, Angelina and
Ho, Daniel and
Oh, Alice and
Koyejo, Sanmi",
booktitle = "Proceedings of the 1st Workshop on Language Models for Underserved Communities (LM4UC 2025)",
month = may,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.lm4uc-1.2/",
doi = "10.18653/v1/2025.lm4uc-1.2",
pages = "8--15",
ISBN = "979-8-89176-242-8",
abstract = "Research on Retrieval-Augmented Generation for low-resource languages has been sparse because of limited resources. To address this, we focus on Bangla, a low-resource language, and have created a dataset of 200 question-answer pairs as a basis for our study from Bangla Wikipedia dumps data. This paper introduces the TraSe architecture, which enhances RAG for Bangla using Translative prompting. Our experiments demonstrate that TraSe improves answer selection accuracy, achieving 34{\%} with automatic retrieval and 63{\%} with Human-in-the-Loop retrieval, outperforming baseline methods. The TraSe architecture marks a significant advancement in RAG for low-resource languages and has the potential to enhance question-answering systems for Bangla and similar languages. Future research could explore additional low-resource languages. The code is available at the following GitHub repository: https://github.com/Atia6/TraSe-Bangla-RAG."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ipa-etal-2025-empowering">
<titleInfo>
<title>Empowering Low-Resource Languages: TraSe Architecture for Enhanced Retrieval-Augmented Generation in Bangla</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atia</namePart>
<namePart type="given">Shahnaz</namePart>
<namePart type="family">Ipa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Abu</namePart>
<namePart type="given">Tareq</namePart>
<namePart type="family">Rony</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Shariful</namePart>
<namePart type="family">Islam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Language Models for Underserved Communities (LM4UC 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sang</namePart>
<namePart type="family">Truong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rifki</namePart>
<namePart type="given">Afina</namePart>
<namePart type="family">Putri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Duc</namePart>
<namePart type="family">Nguyen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Angelina</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Ho</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alice</namePart>
<namePart type="family">Oh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sanmi</namePart>
<namePart type="family">Koyejo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-242-8</identifier>
</relatedItem>
<abstract>Research on Retrieval-Augmented Generation for low-resource languages has been sparse because of limited resources. To address this, we focus on Bangla, a low-resource language, and have created a dataset of 200 question-answer pairs as a basis for our study from Bangla Wikipedia dumps data. This paper introduces the TraSe architecture, which enhances RAG for Bangla using Translative prompting. Our experiments demonstrate that TraSe improves answer selection accuracy, achieving 34% with automatic retrieval and 63% with Human-in-the-Loop retrieval, outperforming baseline methods. The TraSe architecture marks a significant advancement in RAG for low-resource languages and has the potential to enhance question-answering systems for Bangla and similar languages. Future research could explore additional low-resource languages. The code is available at the following GitHub repository: https://github.com/Atia6/TraSe-Bangla-RAG.</abstract>
<identifier type="citekey">ipa-etal-2025-empowering</identifier>
<identifier type="doi">10.18653/v1/2025.lm4uc-1.2</identifier>
<location>
<url>https://aclanthology.org/2025.lm4uc-1.2/</url>
</location>
<part>
<date>2025-05</date>
<extent unit="page">
<start>8</start>
<end>15</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Empowering Low-Resource Languages: TraSe Architecture for Enhanced Retrieval-Augmented Generation in Bangla
%A Ipa, Atia Shahnaz
%A Rony, Mohammad Abu Tareq
%A Islam, Mohammad Shariful
%Y Truong, Sang
%Y Putri, Rifki Afina
%Y Nguyen, Duc
%Y Wang, Angelina
%Y Ho, Daniel
%Y Oh, Alice
%Y Koyejo, Sanmi
%S Proceedings of the 1st Workshop on Language Models for Underserved Communities (LM4UC 2025)
%D 2025
%8 May
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-242-8
%F ipa-etal-2025-empowering
%X Research on Retrieval-Augmented Generation for low-resource languages has been sparse because of limited resources. To address this, we focus on Bangla, a low-resource language, and have created a dataset of 200 question-answer pairs as a basis for our study from Bangla Wikipedia dumps data. This paper introduces the TraSe architecture, which enhances RAG for Bangla using Translative prompting. Our experiments demonstrate that TraSe improves answer selection accuracy, achieving 34% with automatic retrieval and 63% with Human-in-the-Loop retrieval, outperforming baseline methods. The TraSe architecture marks a significant advancement in RAG for low-resource languages and has the potential to enhance question-answering systems for Bangla and similar languages. Future research could explore additional low-resource languages. The code is available at the following GitHub repository: https://github.com/Atia6/TraSe-Bangla-RAG.
%R 10.18653/v1/2025.lm4uc-1.2
%U https://aclanthology.org/2025.lm4uc-1.2/
%U https://doi.org/10.18653/v1/2025.lm4uc-1.2
%P 8-15
Markdown (Informal)
[Empowering Low-Resource Languages: TraSe Architecture for Enhanced Retrieval-Augmented Generation in Bangla](https://aclanthology.org/2025.lm4uc-1.2/) (Ipa et al., LM4UC 2025)
ACL