@inproceedings{gumma-etal-2025-towards,
title = "Towards Inducing Long-Context Abilities in Multilingual Neural Machine Translation Models",
author = "Gumma, Varun and
Chitale, Pranjal A and
Bali, Kalika",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.naacl-long.366/",
doi = "10.18653/v1/2025.naacl-long.366",
pages = "7158--7170",
ISBN = "979-8-89176-189-6",
abstract = "Neural Machine Translation (NMT) models have traditionally used Sinusoidal Positional Embeddings (PEs), which often struggle to capture long-range dependencies and are inefficient for handling extended context or document-level translation tasks. This work addresses the challenge of transitioning pre-trained NMT models from absolute Sinusoidal PEs to Relative PEs, such as RoPE and ALiBi, without compromising performance. We demonstrate that parameter-efficient fine-tuning, using only a small amount of high-quality data, can successfully facilitate this transition. Experimental results indicate that switching from Sinusoidal to Relative PEs results in competitive translation quality on sentence-level evaluation benchmarks. Additionally, models trained with RoPE consistently outperform those using ALiBi and Sinusoidal PEs on document-level benchmarks across both string-based metrics and qualitative evaluations. Moreover, we find that a small amount of long-context data in a few languages is sufficient for cross-lingual length generalization, thereby inducing long-context capabilities."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gumma-etal-2025-towards">
<titleInfo>
<title>Towards Inducing Long-Context Abilities in Multilingual Neural Machine Translation Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Varun</namePart>
<namePart type="family">Gumma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pranjal</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Chitale</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="family">Chiruzzo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lu</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-189-6</identifier>
</relatedItem>
<abstract>Neural Machine Translation (NMT) models have traditionally used Sinusoidal Positional Embeddings (PEs), which often struggle to capture long-range dependencies and are inefficient for handling extended context or document-level translation tasks. This work addresses the challenge of transitioning pre-trained NMT models from absolute Sinusoidal PEs to Relative PEs, such as RoPE and ALiBi, without compromising performance. We demonstrate that parameter-efficient fine-tuning, using only a small amount of high-quality data, can successfully facilitate this transition. Experimental results indicate that switching from Sinusoidal to Relative PEs results in competitive translation quality on sentence-level evaluation benchmarks. Additionally, models trained with RoPE consistently outperform those using ALiBi and Sinusoidal PEs on document-level benchmarks across both string-based metrics and qualitative evaluations. Moreover, we find that a small amount of long-context data in a few languages is sufficient for cross-lingual length generalization, thereby inducing long-context capabilities.</abstract>
<identifier type="citekey">gumma-etal-2025-towards</identifier>
<identifier type="doi">10.18653/v1/2025.naacl-long.366</identifier>
<location>
<url>https://aclanthology.org/2025.naacl-long.366/</url>
</location>
<part>
<date>2025-04</date>
<extent unit="page">
<start>7158</start>
<end>7170</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards Inducing Long-Context Abilities in Multilingual Neural Machine Translation Models
%A Gumma, Varun
%A Chitale, Pranjal A.
%A Bali, Kalika
%Y Chiruzzo, Luis
%Y Ritter, Alan
%Y Wang, Lu
%S Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)
%D 2025
%8 April
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-189-6
%F gumma-etal-2025-towards
%X Neural Machine Translation (NMT) models have traditionally used Sinusoidal Positional Embeddings (PEs), which often struggle to capture long-range dependencies and are inefficient for handling extended context or document-level translation tasks. This work addresses the challenge of transitioning pre-trained NMT models from absolute Sinusoidal PEs to Relative PEs, such as RoPE and ALiBi, without compromising performance. We demonstrate that parameter-efficient fine-tuning, using only a small amount of high-quality data, can successfully facilitate this transition. Experimental results indicate that switching from Sinusoidal to Relative PEs results in competitive translation quality on sentence-level evaluation benchmarks. Additionally, models trained with RoPE consistently outperform those using ALiBi and Sinusoidal PEs on document-level benchmarks across both string-based metrics and qualitative evaluations. Moreover, we find that a small amount of long-context data in a few languages is sufficient for cross-lingual length generalization, thereby inducing long-context capabilities.
%R 10.18653/v1/2025.naacl-long.366
%U https://aclanthology.org/2025.naacl-long.366/
%U https://doi.org/10.18653/v1/2025.naacl-long.366
%P 7158-7170
Markdown (Informal)
[Towards Inducing Long-Context Abilities in Multilingual Neural Machine Translation Models](https://aclanthology.org/2025.naacl-long.366/) (Gumma et al., NAACL 2025)
ACL