@inproceedings{arnob-mahi-2026-one,
title = "One Language, Three of Its Voices: Evaluating Multilingual {LLM}s Across {P}ersian, {D}ari, and {T}ajiki on Translation and Understanding Tasks",
author = "Arnob, Noor Mairukh Khan and
Mahi, Abu Bakar Siddique",
editor = "Merchant, Rayyan and
Megerdoomian, Karine",
booktitle = "The Proceedings of the First Workshop on {NLP} and {LLM}s for the {I}ranian Language Family",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.silkroadnlp-1.10/",
pages = "98--104",
ISBN = "979-8-89176-371-5",
abstract = "The Iranian linguistic family is pluricentric, encompassing Iranian Persian, Dari (Afghanistan), and Tajiki (Tajikistan). While Multilingual Large Language Models (MLLMs) claim broad coverage, their robustness across these regional variants and script differences (Perso-Arabic vs. Cyrillic) remains under-explored, particularly in the open-weight landscape. We evaluate five openweight models from the Qwen, Bloomz, and Gemma families across four downstream tasks: Sentiment Analysis, Machine Translation (MT), NLI, and QA. Utilizing a dataset of over 240,000 processed samples, we observe severe performance disparities. While the fine-tuned gemma-3-4b-persian achieves promising results on Iranian Persian (77.3{\%} accuracy in Sentiment), almost all tested models appear to suffer catastrophic degradation on Tajiki script (dropping to 1.0 BLEU). These findings highlight a critical ``script barrier'' in current open-weight MLLM development for Central Asian languages. Code and data available here."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="arnob-mahi-2026-one">
<titleInfo>
<title>One Language, Three of Its Voices: Evaluating Multilingual LLMs Across Persian, Dari, and Tajiki on Translation and Understanding Tasks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Noor</namePart>
<namePart type="given">Mairukh</namePart>
<namePart type="given">Khan</namePart>
<namePart type="family">Arnob</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abu</namePart>
<namePart type="given">Bakar</namePart>
<namePart type="given">Siddique</namePart>
<namePart type="family">Mahi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>The Proceedings of the First Workshop on NLP and LLMs for the Iranian Language Family</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rayyan</namePart>
<namePart type="family">Merchant</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karine</namePart>
<namePart type="family">Megerdoomian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-371-5</identifier>
</relatedItem>
<abstract>The Iranian linguistic family is pluricentric, encompassing Iranian Persian, Dari (Afghanistan), and Tajiki (Tajikistan). While Multilingual Large Language Models (MLLMs) claim broad coverage, their robustness across these regional variants and script differences (Perso-Arabic vs. Cyrillic) remains under-explored, particularly in the open-weight landscape. We evaluate five openweight models from the Qwen, Bloomz, and Gemma families across four downstream tasks: Sentiment Analysis, Machine Translation (MT), NLI, and QA. Utilizing a dataset of over 240,000 processed samples, we observe severe performance disparities. While the fine-tuned gemma-3-4b-persian achieves promising results on Iranian Persian (77.3% accuracy in Sentiment), almost all tested models appear to suffer catastrophic degradation on Tajiki script (dropping to 1.0 BLEU). These findings highlight a critical “script barrier” in current open-weight MLLM development for Central Asian languages. Code and data available here.</abstract>
<identifier type="citekey">arnob-mahi-2026-one</identifier>
<location>
<url>https://aclanthology.org/2026.silkroadnlp-1.10/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>98</start>
<end>104</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T One Language, Three of Its Voices: Evaluating Multilingual LLMs Across Persian, Dari, and Tajiki on Translation and Understanding Tasks
%A Arnob, Noor Mairukh Khan
%A Mahi, Abu Bakar Siddique
%Y Merchant, Rayyan
%Y Megerdoomian, Karine
%S The Proceedings of the First Workshop on NLP and LLMs for the Iranian Language Family
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-371-5
%F arnob-mahi-2026-one
%X The Iranian linguistic family is pluricentric, encompassing Iranian Persian, Dari (Afghanistan), and Tajiki (Tajikistan). While Multilingual Large Language Models (MLLMs) claim broad coverage, their robustness across these regional variants and script differences (Perso-Arabic vs. Cyrillic) remains under-explored, particularly in the open-weight landscape. We evaluate five openweight models from the Qwen, Bloomz, and Gemma families across four downstream tasks: Sentiment Analysis, Machine Translation (MT), NLI, and QA. Utilizing a dataset of over 240,000 processed samples, we observe severe performance disparities. While the fine-tuned gemma-3-4b-persian achieves promising results on Iranian Persian (77.3% accuracy in Sentiment), almost all tested models appear to suffer catastrophic degradation on Tajiki script (dropping to 1.0 BLEU). These findings highlight a critical “script barrier” in current open-weight MLLM development for Central Asian languages. Code and data available here.
%U https://aclanthology.org/2026.silkroadnlp-1.10/
%P 98-104
Markdown (Informal)
[One Language, Three of Its Voices: Evaluating Multilingual LLMs Across Persian, Dari, and Tajiki on Translation and Understanding Tasks](https://aclanthology.org/2026.silkroadnlp-1.10/) (Arnob & Mahi, SilkRoadNLP 2026)
ACL