@inproceedings{prasanth-2026-translation,
title = "Translation-Augmented Multilingual Summarization for Low-Resource Languages",
author = "Prasanth",
editor = "Chakravarthi, Bharathi Raja and
B, Bharathi and
Buitelaar, Paul and
Thenmozhi, Durairaj and
Garc{\'i}a Cumbreras, Miguel {\'A}ngel and
Jim{\'e}nez Zafra, Salud Mar{\'i}a",
booktitle = "Proceedings of the Sixth Workshop on Language Technology for Equality, Diversity, Inclusion",
month = jul,
year = "2026",
address = "Virtual (Online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.ltedi-1.10/",
pages = "108--117",
ISBN = "979-8-89176-424-8",
abstract = "While automatic text summarization has achieved remarkable success in English,extending these capabilities to low-resource languages remains a significantchallenge due to the scarcity of labeled training data. We propose atranslation-augmented approach to multilingual summarization: we systematicallytranslate high-quality English summarization corpora into low-resource targetlanguages using NLLB-200, and use the resulting parallel data to train andevaluate sequence-to-sequence models. We experiment across three typologicallydiverse languages{---}Swahili, Hausa, and Afrikaans{---}comparing monolingualfine-tuning (MONO), cross-lingual transfer (XLT), and joint multilingualtraining (TAMT) on mBART-large-50. Monolingual fine-tuning achieves the bestperformance for Swahili (ROUGE-L 13.9) and Afrikaans (ROUGE-L 15.7),surpassing the Lead-3 baseline in both cases, while cross-lingual transferremains strongest for Hausa (ROUGE-L 14.5). We show that native language tokenavailability in mBART-50 is a critical determinant of fine-tuning performance,and characterize the conditions under which the theoretically expectedTAMT $>$ MONO $>$ XLT ordering breaks down. We release our dataset, code, andevaluation infrastructure to support future research on low-resourcemultilingual summarization."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="prasanth-2026-translation">
<titleInfo>
<title>Translation-Augmented Multilingual Summarization for Low-Resource Languages</title>
</titleInfo>
<name>
<namePart>Prasanth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth Workshop on Language Technology for Equality, Diversity, Inclusion</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="given">Raja</namePart>
<namePart type="family">Chakravarthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="family">B</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Buitelaar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Durairaj</namePart>
<namePart type="family">Thenmozhi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Miguel</namePart>
<namePart type="given">Ángel</namePart>
<namePart type="family">García Cumbreras</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Salud</namePart>
<namePart type="given">María</namePart>
<namePart type="family">Jiménez Zafra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Virtual (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-424-8</identifier>
</relatedItem>
<abstract>While automatic text summarization has achieved remarkable success in English,extending these capabilities to low-resource languages remains a significantchallenge due to the scarcity of labeled training data. We propose atranslation-augmented approach to multilingual summarization: we systematicallytranslate high-quality English summarization corpora into low-resource targetlanguages using NLLB-200, and use the resulting parallel data to train andevaluate sequence-to-sequence models. We experiment across three typologicallydiverse languages—Swahili, Hausa, and Afrikaans—comparing monolingualfine-tuning (MONO), cross-lingual transfer (XLT), and joint multilingualtraining (TAMT) on mBART-large-50. Monolingual fine-tuning achieves the bestperformance for Swahili (ROUGE-L 13.9) and Afrikaans (ROUGE-L 15.7),surpassing the Lead-3 baseline in both cases, while cross-lingual transferremains strongest for Hausa (ROUGE-L 14.5). We show that native language tokenavailability in mBART-50 is a critical determinant of fine-tuning performance,and characterize the conditions under which the theoretically expectedTAMT > MONO > XLT ordering breaks down. We release our dataset, code, andevaluation infrastructure to support future research on low-resourcemultilingual summarization.</abstract>
<identifier type="citekey">prasanth-2026-translation</identifier>
<location>
<url>https://aclanthology.org/2026.ltedi-1.10/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>108</start>
<end>117</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Translation-Augmented Multilingual Summarization for Low-Resource Languages
%Y Chakravarthi, Bharathi Raja
%Y B, Bharathi
%Y Buitelaar, Paul
%Y Thenmozhi, Durairaj
%Y García Cumbreras, Miguel Ángel
%Y Jiménez Zafra, Salud María
%A Prasanth
%S Proceedings of the Sixth Workshop on Language Technology for Equality, Diversity, Inclusion
%D 2026
%8 July
%I Association for Computational Linguistics
%C Virtual (Online)
%@ 979-8-89176-424-8
%F prasanth-2026-translation
%X While automatic text summarization has achieved remarkable success in English,extending these capabilities to low-resource languages remains a significantchallenge due to the scarcity of labeled training data. We propose atranslation-augmented approach to multilingual summarization: we systematicallytranslate high-quality English summarization corpora into low-resource targetlanguages using NLLB-200, and use the resulting parallel data to train andevaluate sequence-to-sequence models. We experiment across three typologicallydiverse languages—Swahili, Hausa, and Afrikaans—comparing monolingualfine-tuning (MONO), cross-lingual transfer (XLT), and joint multilingualtraining (TAMT) on mBART-large-50. Monolingual fine-tuning achieves the bestperformance for Swahili (ROUGE-L 13.9) and Afrikaans (ROUGE-L 15.7),surpassing the Lead-3 baseline in both cases, while cross-lingual transferremains strongest for Hausa (ROUGE-L 14.5). We show that native language tokenavailability in mBART-50 is a critical determinant of fine-tuning performance,and characterize the conditions under which the theoretically expectedTAMT > MONO > XLT ordering breaks down. We release our dataset, code, andevaluation infrastructure to support future research on low-resourcemultilingual summarization.
%U https://aclanthology.org/2026.ltedi-1.10/
%P 108-117
Markdown (Informal)
[Translation-Augmented Multilingual Summarization for Low-Resource Languages](https://aclanthology.org/2026.ltedi-1.10/) (Prasanth, LTEDI 2026)
ACL