@inproceedings{alex-r-atrio-popescu-belis-2022-interaction,
title = "On the Interaction of Regularization Factors in Low-resource Neural Machine Translation",
author = "Atrio, {\`A}lex R. and
Popescu-Belis, Andrei",
editor = {Moniz, Helena and
Macken, Lieve and
Rufener, Andrew and
Barrault, Lo{\"\i}c and
Costa-juss{\`a}, Marta R. and
Declercq, Christophe and
Koponen, Maarit and
Kemp, Ellie and
Pilos, Spyridon and
Forcada, Mikel L. and
Scarton, Carolina and
Van den Bogaert, Joachim and
Daems, Joke and
Tezcan, Arda and
Vanroy, Bram and
Fonteyne, Margot},
booktitle = "Proceedings of the 23rd Annual Conference of the European Association for Machine Translation",
month = jun,
year = "2022",
address = "Ghent, Belgium",
publisher = "European Association for Machine Translation",
url = "https://aclanthology.org/2022.eamt-1.14",
pages = "111--120",
abstract = "We explore the roles and interactions of the hyper-parameters governing regularization, and propose a range of values applicable to low-resource neural machine translation. We demonstrate that default or recommended values for high-resource settings are not optimal for low-resource ones, and that more aggressive regularization is needed when resources are scarce, in proportion to their scarcity. We explain our observations by the generalization abilities of sharp vs. flat basins in the loss landscape of a neural network. Results for four regularization factors corroborate our claim: batch size, learning rate, dropout rate, and gradient clipping. Moreover, we show that optimal results are obtained when using several of these factors, and that our findings generalize across datasets of different sizes and languages.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="alex-r-atrio-popescu-belis-2022-interaction">
<titleInfo>
<title>On the Interaction of Regularization Factors in Low-resource Neural Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Àlex</namePart>
<namePart type="given">R</namePart>
<namePart type="family">Atrio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrei</namePart>
<namePart type="family">Popescu-Belis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 23rd Annual Conference of the European Association for Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Moniz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lieve</namePart>
<namePart type="family">Macken</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrew</namePart>
<namePart type="family">Rufener</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Loïc</namePart>
<namePart type="family">Barrault</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marta</namePart>
<namePart type="given">R</namePart>
<namePart type="family">Costa-jussà</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christophe</namePart>
<namePart type="family">Declercq</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maarit</namePart>
<namePart type="family">Koponen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ellie</namePart>
<namePart type="family">Kemp</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Spyridon</namePart>
<namePart type="family">Pilos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mikel</namePart>
<namePart type="given">L</namePart>
<namePart type="family">Forcada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolina</namePart>
<namePart type="family">Scarton</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joachim</namePart>
<namePart type="family">Van den Bogaert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joke</namePart>
<namePart type="family">Daems</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arda</namePart>
<namePart type="family">Tezcan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bram</namePart>
<namePart type="family">Vanroy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Margot</namePart>
<namePart type="family">Fonteyne</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Association for Machine Translation</publisher>
<place>
<placeTerm type="text">Ghent, Belgium</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We explore the roles and interactions of the hyper-parameters governing regularization, and propose a range of values applicable to low-resource neural machine translation. We demonstrate that default or recommended values for high-resource settings are not optimal for low-resource ones, and that more aggressive regularization is needed when resources are scarce, in proportion to their scarcity. We explain our observations by the generalization abilities of sharp vs. flat basins in the loss landscape of a neural network. Results for four regularization factors corroborate our claim: batch size, learning rate, dropout rate, and gradient clipping. Moreover, we show that optimal results are obtained when using several of these factors, and that our findings generalize across datasets of different sizes and languages.</abstract>
<identifier type="citekey">alex-r-atrio-popescu-belis-2022-interaction</identifier>
<location>
<url>https://aclanthology.org/2022.eamt-1.14</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>111</start>
<end>120</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T On the Interaction of Regularization Factors in Low-resource Neural Machine Translation
%A Atrio, Àlex R.
%A Popescu-Belis, Andrei
%Y Moniz, Helena
%Y Macken, Lieve
%Y Rufener, Andrew
%Y Barrault, Loïc
%Y Costa-jussà, Marta R.
%Y Declercq, Christophe
%Y Koponen, Maarit
%Y Kemp, Ellie
%Y Pilos, Spyridon
%Y Forcada, Mikel L.
%Y Scarton, Carolina
%Y Van den Bogaert, Joachim
%Y Daems, Joke
%Y Tezcan, Arda
%Y Vanroy, Bram
%Y Fonteyne, Margot
%S Proceedings of the 23rd Annual Conference of the European Association for Machine Translation
%D 2022
%8 June
%I European Association for Machine Translation
%C Ghent, Belgium
%F alex-r-atrio-popescu-belis-2022-interaction
%X We explore the roles and interactions of the hyper-parameters governing regularization, and propose a range of values applicable to low-resource neural machine translation. We demonstrate that default or recommended values for high-resource settings are not optimal for low-resource ones, and that more aggressive regularization is needed when resources are scarce, in proportion to their scarcity. We explain our observations by the generalization abilities of sharp vs. flat basins in the loss landscape of a neural network. Results for four regularization factors corroborate our claim: batch size, learning rate, dropout rate, and gradient clipping. Moreover, we show that optimal results are obtained when using several of these factors, and that our findings generalize across datasets of different sizes and languages.
%U https://aclanthology.org/2022.eamt-1.14
%P 111-120
Markdown (Informal)
[On the Interaction of Regularization Factors in Low-resource Neural Machine Translation](https://aclanthology.org/2022.eamt-1.14) (Atrio & Popescu-Belis, EAMT 2022)
ACL