@inproceedings{gaber-etal-2026-mbzuai,
title = "{MBZUAI} at {AMIYA} Shared Task 2026: Adapting Open-Source {LLM}s for Dialectal {A}rabic",
author = "Gaber, Rana and
Allam, Yara and
Amin, Serag and
Aly, Ranwa and
Alhafni, Bashar",
booktitle = "Proceedings of the 13th Workshop on {NLP} for Similar Languages, Varieties and Dialects",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.vardial-1.31/",
pages = "373--384",
abstract = "This paper presents our contribution to the closed data track of the AMIYA Shared Task on Dialectal Arabic text generation. In this track, we train fully open-source Large Language Models (LLMs) on five Arabic dialects: Egyptian, Moroccan, Palestinian, Saudi, and Syrian, using the provided training datasets. We experiment with different base and instruct models using several pretraining and instruction tuning approaches. In total, five models were submitted, with three variants per dialect. Our best-performing models for the five dialects are ALLaM for Egyptian, LLaMa for Moroccan, and Palestinian, and Aya for Saudi and Syrian."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gaber-etal-2026-mbzuai">
<titleInfo>
<title>MBZUAI at AMIYA Shared Task 2026: Adapting Open-Source LLMs for Dialectal Arabic</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rana</namePart>
<namePart type="family">Gaber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yara</namePart>
<namePart type="family">Allam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Serag</namePart>
<namePart type="family">Amin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ranwa</namePart>
<namePart type="family">Aly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bashar</namePart>
<namePart type="family">Alhafni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th Workshop on NLP for Similar Languages, Varieties and Dialects</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents our contribution to the closed data track of the AMIYA Shared Task on Dialectal Arabic text generation. In this track, we train fully open-source Large Language Models (LLMs) on five Arabic dialects: Egyptian, Moroccan, Palestinian, Saudi, and Syrian, using the provided training datasets. We experiment with different base and instruct models using several pretraining and instruction tuning approaches. In total, five models were submitted, with three variants per dialect. Our best-performing models for the five dialects are ALLaM for Egyptian, LLaMa for Moroccan, and Palestinian, and Aya for Saudi and Syrian.</abstract>
<identifier type="citekey">gaber-etal-2026-mbzuai</identifier>
<location>
<url>https://aclanthology.org/2026.vardial-1.31/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>373</start>
<end>384</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MBZUAI at AMIYA Shared Task 2026: Adapting Open-Source LLMs for Dialectal Arabic
%A Gaber, Rana
%A Allam, Yara
%A Amin, Serag
%A Aly, Ranwa
%A Alhafni, Bashar
%S Proceedings of the 13th Workshop on NLP for Similar Languages, Varieties and Dialects
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%F gaber-etal-2026-mbzuai
%X This paper presents our contribution to the closed data track of the AMIYA Shared Task on Dialectal Arabic text generation. In this track, we train fully open-source Large Language Models (LLMs) on five Arabic dialects: Egyptian, Moroccan, Palestinian, Saudi, and Syrian, using the provided training datasets. We experiment with different base and instruct models using several pretraining and instruction tuning approaches. In total, five models were submitted, with three variants per dialect. Our best-performing models for the five dialects are ALLaM for Egyptian, LLaMa for Moroccan, and Palestinian, and Aya for Saudi and Syrian.
%U https://aclanthology.org/2026.vardial-1.31/
%P 373-384
Markdown (Informal)
[MBZUAI at AMIYA Shared Task 2026: Adapting Open-Source LLMs for Dialectal Arabic](https://aclanthology.org/2026.vardial-1.31/) (Gaber et al., VarDial 2026)
ACL