@inproceedings{abdelghaffar-etal-2022-adapting,
title = "Adapting Large Multilingual Machine Translation Models to Unseen Low Resource Languages via Vocabulary Substitution and Neuron Selection",
author = "Abdelghaffar, Mohamed A and
El Mogy, Amr and
Sharaf, Nada Ahmed",
editor = "Duh, Kevin and
Guzm{\'a}n, Francisco",
booktitle = "Proceedings of the 15th biennial conference of the Association for Machine Translation in the Americas (Volume 1: Research Track)",
month = sep,
year = "2022",
address = "Orlando, USA",
publisher = "Association for Machine Translation in the Americas",
url = "https://aclanthology.org/2022.amta-research.22",
pages = "287--297",
abstract = "We propose a method to adapt large Multilingual Machine Translation models to a low resource language (LRL) that was not included during the pre-training/training phases. We use neuron-ranking analysis to select neurons that are most influential to the high resource language (HRL) and fine-tune only this subset of the deep neural network{'}s neurons. We experiment with three mechanisms to compute such ranking. To allow for the potential difference in writing scripts between the HRL and LRL we utilize an alignment model to substitute HRL elements of the predefined vocab with appropriate LRL ones. Our method improves on both zero-shot and the stronger baseline of directly fine-tuning the model on the low-resource data by 3 BLEU points in X -{\textgreater} E and 1.6 points in E -{\textgreater} X.We also show that as we simulate smaller data amounts, the gap between our method and direct fine-tuning continues to widen.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="abdelghaffar-etal-2022-adapting">
<titleInfo>
<title>Adapting Large Multilingual Machine Translation Models to Unseen Low Resource Languages via Vocabulary Substitution and Neuron Selection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mohamed</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Abdelghaffar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amr</namePart>
<namePart type="family">El Mogy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nada</namePart>
<namePart type="given">Ahmed</namePart>
<namePart type="family">Sharaf</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th biennial conference of the Association for Machine Translation in the Americas (Volume 1: Research Track)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Duh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francisco</namePart>
<namePart type="family">Guzmán</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Machine Translation in the Americas</publisher>
<place>
<placeTerm type="text">Orlando, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We propose a method to adapt large Multilingual Machine Translation models to a low resource language (LRL) that was not included during the pre-training/training phases. We use neuron-ranking analysis to select neurons that are most influential to the high resource language (HRL) and fine-tune only this subset of the deep neural network’s neurons. We experiment with three mechanisms to compute such ranking. To allow for the potential difference in writing scripts between the HRL and LRL we utilize an alignment model to substitute HRL elements of the predefined vocab with appropriate LRL ones. Our method improves on both zero-shot and the stronger baseline of directly fine-tuning the model on the low-resource data by 3 BLEU points in X -\textgreater E and 1.6 points in E -\textgreater X.We also show that as we simulate smaller data amounts, the gap between our method and direct fine-tuning continues to widen.</abstract>
<identifier type="citekey">abdelghaffar-etal-2022-adapting</identifier>
<location>
<url>https://aclanthology.org/2022.amta-research.22</url>
</location>
<part>
<date>2022-09</date>
<extent unit="page">
<start>287</start>
<end>297</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Adapting Large Multilingual Machine Translation Models to Unseen Low Resource Languages via Vocabulary Substitution and Neuron Selection
%A Abdelghaffar, Mohamed A.
%A El Mogy, Amr
%A Sharaf, Nada Ahmed
%Y Duh, Kevin
%Y Guzmán, Francisco
%S Proceedings of the 15th biennial conference of the Association for Machine Translation in the Americas (Volume 1: Research Track)
%D 2022
%8 September
%I Association for Machine Translation in the Americas
%C Orlando, USA
%F abdelghaffar-etal-2022-adapting
%X We propose a method to adapt large Multilingual Machine Translation models to a low resource language (LRL) that was not included during the pre-training/training phases. We use neuron-ranking analysis to select neurons that are most influential to the high resource language (HRL) and fine-tune only this subset of the deep neural network’s neurons. We experiment with three mechanisms to compute such ranking. To allow for the potential difference in writing scripts between the HRL and LRL we utilize an alignment model to substitute HRL elements of the predefined vocab with appropriate LRL ones. Our method improves on both zero-shot and the stronger baseline of directly fine-tuning the model on the low-resource data by 3 BLEU points in X -\textgreater E and 1.6 points in E -\textgreater X.We also show that as we simulate smaller data amounts, the gap between our method and direct fine-tuning continues to widen.
%U https://aclanthology.org/2022.amta-research.22
%P 287-297
Markdown (Informal)
[Adapting Large Multilingual Machine Translation Models to Unseen Low Resource Languages via Vocabulary Substitution and Neuron Selection](https://aclanthology.org/2022.amta-research.22) (Abdelghaffar et al., AMTA 2022)
ACL