@inproceedings{maheshwari-etal-2026-improving,
title = "Improving Dialect Robustness in Large Language Models via {L}o{RA} and Mixture-of-Experts",
author = "Maheshwari, Sanjh and
Rajpoot, Aniket Singh and
Cocarascu, Oana and
., Mamta",
booktitle = "Proceedings of the 13th Workshop on {NLP} for Similar Languages, Varieties and Dialects",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.vardial-1.24/",
pages = "293--303",
abstract = "Despite the success of large language models (LLMs) in a wide range of applications, it has been shown that their performance varies across English dialects. Differences among English dialects are reflected in vocabulary, syntax, and writing style, and can adversely affect model performance. Several studies evaluate the dialect robustness of LLMs, yet research on enhancing their robustness to dialectal variation remains limited. In this paper, we propose two parameter-efficient frameworks for improving dialectal robustness in LLMs: DialectFusion where we train separate LoRA layers for each dialect and apply different LoRA merging methods, and DialectMoE which is built on top of Mixture of Experts LoRA and introduces multiple LoRA-based experts to the feed-forward layer to internally model the dialectal dependencies. Our comprehensive analysis on five open-source LLMs for sentiment and sarcasm tasks in zero- and few-shot settings shows that our proposed approaches enhance the dialect robustness of LLMs and outperforms instruct and LoRA fine-tuning based approaches."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="maheshwari-etal-2026-improving">
<titleInfo>
<title>Improving Dialect Robustness in Large Language Models via LoRA and Mixture-of-Experts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sanjh</namePart>
<namePart type="family">Maheshwari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aniket</namePart>
<namePart type="given">Singh</namePart>
<namePart type="family">Rajpoot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oana</namePart>
<namePart type="family">Cocarascu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mamta</namePart>
<namePart type="family">.</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th Workshop on NLP for Similar Languages, Varieties and Dialects</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Despite the success of large language models (LLMs) in a wide range of applications, it has been shown that their performance varies across English dialects. Differences among English dialects are reflected in vocabulary, syntax, and writing style, and can adversely affect model performance. Several studies evaluate the dialect robustness of LLMs, yet research on enhancing their robustness to dialectal variation remains limited. In this paper, we propose two parameter-efficient frameworks for improving dialectal robustness in LLMs: DialectFusion where we train separate LoRA layers for each dialect and apply different LoRA merging methods, and DialectMoE which is built on top of Mixture of Experts LoRA and introduces multiple LoRA-based experts to the feed-forward layer to internally model the dialectal dependencies. Our comprehensive analysis on five open-source LLMs for sentiment and sarcasm tasks in zero- and few-shot settings shows that our proposed approaches enhance the dialect robustness of LLMs and outperforms instruct and LoRA fine-tuning based approaches.</abstract>
<identifier type="citekey">maheshwari-etal-2026-improving</identifier>
<location>
<url>https://aclanthology.org/2026.vardial-1.24/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>293</start>
<end>303</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Improving Dialect Robustness in Large Language Models via LoRA and Mixture-of-Experts
%A Maheshwari, Sanjh
%A Rajpoot, Aniket Singh
%A Cocarascu, Oana
%A ., Mamta
%S Proceedings of the 13th Workshop on NLP for Similar Languages, Varieties and Dialects
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%F maheshwari-etal-2026-improving
%X Despite the success of large language models (LLMs) in a wide range of applications, it has been shown that their performance varies across English dialects. Differences among English dialects are reflected in vocabulary, syntax, and writing style, and can adversely affect model performance. Several studies evaluate the dialect robustness of LLMs, yet research on enhancing their robustness to dialectal variation remains limited. In this paper, we propose two parameter-efficient frameworks for improving dialectal robustness in LLMs: DialectFusion where we train separate LoRA layers for each dialect and apply different LoRA merging methods, and DialectMoE which is built on top of Mixture of Experts LoRA and introduces multiple LoRA-based experts to the feed-forward layer to internally model the dialectal dependencies. Our comprehensive analysis on five open-source LLMs for sentiment and sarcasm tasks in zero- and few-shot settings shows that our proposed approaches enhance the dialect robustness of LLMs and outperforms instruct and LoRA fine-tuning based approaches.
%U https://aclanthology.org/2026.vardial-1.24/
%P 293-303
Markdown (Informal)
[Improving Dialect Robustness in Large Language Models via LoRA and Mixture-of-Experts](https://aclanthology.org/2026.vardial-1.24/) (Maheshwari et al., VarDial 2026)
ACL