@inproceedings{gollapalli-etal-2026-nus,
title = "{NUS}-{IDS} at {AMIYA}/{V}ar{D}ial 2026: Improving {A}rabic Dialectness in {LLM}s with Reinforcement Learning",
author = "Gollapalli, Sujatha Das and
Hakam, Mouad and
Du, Mingzhe and
Ng, See-Kiong",
booktitle = "Proceedings of the 13th Workshop on {NLP} for Similar Languages, Varieties and Dialects",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.vardial-1.30/",
pages = "365--372",
abstract = "In this paper, we describe models developed by our team, NUS-IDS, for the Closed data track at the Arabic Modeling In Your Accent (AMIYA) shared task at VarDial 2026. The core idea behind our solution involves data augmentation enabled by a dialect classifier trained on AMIYA data. We effectively combine various translation, summarization, and question answering prompts with AMIYA training data to form dialectal prompts for use with state-of-the-art LLMs. Next, dialect predictions from our classifier on outputs from these LLMs are used to compile preference data for Reinforcement Learning (RL). We report model performance on dialectal Arabic from Egypt, Morocco, Palestine, Saudi Arabia and Syria using FLORES+, a multilingual machine translation dataset. Our experiments illustrate that though our RL models show significant performance gains on dialectness scores, they under perform on translation metrics such as chrF++ compared to base LLMs."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gollapalli-etal-2026-nus">
<titleInfo>
<title>NUS-IDS at AMIYA/VarDial 2026: Improving Arabic Dialectness in LLMs with Reinforcement Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sujatha</namePart>
<namePart type="given">Das</namePart>
<namePart type="family">Gollapalli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mouad</namePart>
<namePart type="family">Hakam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mingzhe</namePart>
<namePart type="family">Du</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">See-Kiong</namePart>
<namePart type="family">Ng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th Workshop on NLP for Similar Languages, Varieties and Dialects</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we describe models developed by our team, NUS-IDS, for the Closed data track at the Arabic Modeling In Your Accent (AMIYA) shared task at VarDial 2026. The core idea behind our solution involves data augmentation enabled by a dialect classifier trained on AMIYA data. We effectively combine various translation, summarization, and question answering prompts with AMIYA training data to form dialectal prompts for use with state-of-the-art LLMs. Next, dialect predictions from our classifier on outputs from these LLMs are used to compile preference data for Reinforcement Learning (RL). We report model performance on dialectal Arabic from Egypt, Morocco, Palestine, Saudi Arabia and Syria using FLORES+, a multilingual machine translation dataset. Our experiments illustrate that though our RL models show significant performance gains on dialectness scores, they under perform on translation metrics such as chrF++ compared to base LLMs.</abstract>
<identifier type="citekey">gollapalli-etal-2026-nus</identifier>
<location>
<url>https://aclanthology.org/2026.vardial-1.30/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>365</start>
<end>372</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T NUS-IDS at AMIYA/VarDial 2026: Improving Arabic Dialectness in LLMs with Reinforcement Learning
%A Gollapalli, Sujatha Das
%A Hakam, Mouad
%A Du, Mingzhe
%A Ng, See-Kiong
%S Proceedings of the 13th Workshop on NLP for Similar Languages, Varieties and Dialects
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%F gollapalli-etal-2026-nus
%X In this paper, we describe models developed by our team, NUS-IDS, for the Closed data track at the Arabic Modeling In Your Accent (AMIYA) shared task at VarDial 2026. The core idea behind our solution involves data augmentation enabled by a dialect classifier trained on AMIYA data. We effectively combine various translation, summarization, and question answering prompts with AMIYA training data to form dialectal prompts for use with state-of-the-art LLMs. Next, dialect predictions from our classifier on outputs from these LLMs are used to compile preference data for Reinforcement Learning (RL). We report model performance on dialectal Arabic from Egypt, Morocco, Palestine, Saudi Arabia and Syria using FLORES+, a multilingual machine translation dataset. Our experiments illustrate that though our RL models show significant performance gains on dialectness scores, they under perform on translation metrics such as chrF++ compared to base LLMs.
%U https://aclanthology.org/2026.vardial-1.30/
%P 365-372
Markdown (Informal)
[NUS-IDS at AMIYA/VarDial 2026: Improving Arabic Dialectness in LLMs with Reinforcement Learning](https://aclanthology.org/2026.vardial-1.30/) (Gollapalli et al., VarDial 2026)
ACL