@inproceedings{ansari-etal-2026-multi,
title = "Multi-modal Neural Machine Translation for Low-Resource Classical {P}ersian Poetry: A Culture-Aware Evaluation",
author = "Ansari, Soheila and
Boukadoum, Mounir and
Sadat, Fatiha",
editor = "Merchant, Rayyan and
Megerdoomian, Karine",
booktitle = "The Proceedings of the First Workshop on {NLP} and {LLM}s for the {I}ranian Language Family",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.silkroadnlp-1.14/",
pages = "131--139",
ISBN = "979-8-89176-371-5",
abstract = "Persian poetry, particularly Rumi{'}s Masnaviye-Ma{'}navi, is known for its complex form, mystical narrative style, rich cultural information, and linguistic nuances, and is considered a low-resource domain. Translating Persian poetry is a challenging task for neural machine translation (NMT) systems. To address this challenge, we present a novel multimodal NMT system for Rumi{'}s Masnavi in four stages. First, we built a new multi-modal parallel Persian-English corpus of 26,571 aligned verses from all six books of Masnavi, and all paired with aligned audio recitations. Second, a strong text-only baseline is developed by applying domain-adaptive fine-tuning to mBART- 50, pre-trained on a large monolingual Persian poetry corpus, followed by training on the parallel Masnavi corpus (train set). Third, we extend this model to a multi-modal scenario by adding aligned audio representations using a cross-attention fusion mechanism. Fourth, we conduct a culture-aware evaluation. We propose a culture-specific item (CSI) evaluation approach by developing a CSI classification system and a Persian-English CSI dictionary alongside the standard MT metrics. Our findings demonstrate that integrating audio recitations increased the BLEU score from 9.85 to 17.95, and raised CSI-recall from 61.60{\%} to 82.04{\%}, suggesting greater consistency in producing culturally meaningful terms."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ansari-etal-2026-multi">
<titleInfo>
<title>Multi-modal Neural Machine Translation for Low-Resource Classical Persian Poetry: A Culture-Aware Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Soheila</namePart>
<namePart type="family">Ansari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mounir</namePart>
<namePart type="family">Boukadoum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fatiha</namePart>
<namePart type="family">Sadat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>The Proceedings of the First Workshop on NLP and LLMs for the Iranian Language Family</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rayyan</namePart>
<namePart type="family">Merchant</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karine</namePart>
<namePart type="family">Megerdoomian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-371-5</identifier>
</relatedItem>
<abstract>Persian poetry, particularly Rumi’s Masnaviye-Ma’navi, is known for its complex form, mystical narrative style, rich cultural information, and linguistic nuances, and is considered a low-resource domain. Translating Persian poetry is a challenging task for neural machine translation (NMT) systems. To address this challenge, we present a novel multimodal NMT system for Rumi’s Masnavi in four stages. First, we built a new multi-modal parallel Persian-English corpus of 26,571 aligned verses from all six books of Masnavi, and all paired with aligned audio recitations. Second, a strong text-only baseline is developed by applying domain-adaptive fine-tuning to mBART- 50, pre-trained on a large monolingual Persian poetry corpus, followed by training on the parallel Masnavi corpus (train set). Third, we extend this model to a multi-modal scenario by adding aligned audio representations using a cross-attention fusion mechanism. Fourth, we conduct a culture-aware evaluation. We propose a culture-specific item (CSI) evaluation approach by developing a CSI classification system and a Persian-English CSI dictionary alongside the standard MT metrics. Our findings demonstrate that integrating audio recitations increased the BLEU score from 9.85 to 17.95, and raised CSI-recall from 61.60% to 82.04%, suggesting greater consistency in producing culturally meaningful terms.</abstract>
<identifier type="citekey">ansari-etal-2026-multi</identifier>
<location>
<url>https://aclanthology.org/2026.silkroadnlp-1.14/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>131</start>
<end>139</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multi-modal Neural Machine Translation for Low-Resource Classical Persian Poetry: A Culture-Aware Evaluation
%A Ansari, Soheila
%A Boukadoum, Mounir
%A Sadat, Fatiha
%Y Merchant, Rayyan
%Y Megerdoomian, Karine
%S The Proceedings of the First Workshop on NLP and LLMs for the Iranian Language Family
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-371-5
%F ansari-etal-2026-multi
%X Persian poetry, particularly Rumi’s Masnaviye-Ma’navi, is known for its complex form, mystical narrative style, rich cultural information, and linguistic nuances, and is considered a low-resource domain. Translating Persian poetry is a challenging task for neural machine translation (NMT) systems. To address this challenge, we present a novel multimodal NMT system for Rumi’s Masnavi in four stages. First, we built a new multi-modal parallel Persian-English corpus of 26,571 aligned verses from all six books of Masnavi, and all paired with aligned audio recitations. Second, a strong text-only baseline is developed by applying domain-adaptive fine-tuning to mBART- 50, pre-trained on a large monolingual Persian poetry corpus, followed by training on the parallel Masnavi corpus (train set). Third, we extend this model to a multi-modal scenario by adding aligned audio representations using a cross-attention fusion mechanism. Fourth, we conduct a culture-aware evaluation. We propose a culture-specific item (CSI) evaluation approach by developing a CSI classification system and a Persian-English CSI dictionary alongside the standard MT metrics. Our findings demonstrate that integrating audio recitations increased the BLEU score from 9.85 to 17.95, and raised CSI-recall from 61.60% to 82.04%, suggesting greater consistency in producing culturally meaningful terms.
%U https://aclanthology.org/2026.silkroadnlp-1.14/
%P 131-139
Markdown (Informal)
[Multi-modal Neural Machine Translation for Low-Resource Classical Persian Poetry: A Culture-Aware Evaluation](https://aclanthology.org/2026.silkroadnlp-1.14/) (Ansari et al., SilkRoadNLP 2026)
ACL