@inproceedings{cooper-stickland-murray-2021-regularising,
title = "Regularising Fisher Information Improves Cross-lingual Generalisation",
author = "Cooper Stickland, Asa and
Murray, Iain",
booktitle = "Proceedings of the 1st Workshop on Multilingual Representation Learning",
month = nov,
year = "2021",
address = "Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.mrl-1.20",
doi = "10.18653/v1/2021.mrl-1.20",
pages = "238--241",
abstract = "Many recent works use {`}consistency regularisation{'} to improve the generalisation of fine-tuned pre-trained models, both multilingual and English-only. These works encourage model outputs to be similar between a perturbed and normal version of the input, usually via penalising the Kullback{--}Leibler (KL) divergence between the probability distribution of the perturbed and normal model. We believe that consistency losses may be implicitly regularizing the loss landscape. In particular, we build on work hypothesising that implicitly or explicitly regularizing trace of the Fisher Information Matrix (FIM), amplifies the implicit bias of SGD to avoid memorization. Our initial results show both empirically and theoretically that consistency losses are related to the FIM, and show that the flat minima implied by a small trace of the FIM improves performance when fine-tuning a multilingual model on additional languages. We aim to confirm these initial results on more datasets, and use our insights to develop better multilingual fine-tuning techniques.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cooper-stickland-murray-2021-regularising">
<titleInfo>
<title>Regularising Fisher Information Improves Cross-lingual Generalisation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Asa</namePart>
<namePart type="family">Cooper Stickland</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iain</namePart>
<namePart type="family">Murray</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Multilingual Representation Learning</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Many recent works use ‘consistency regularisation’ to improve the generalisation of fine-tuned pre-trained models, both multilingual and English-only. These works encourage model outputs to be similar between a perturbed and normal version of the input, usually via penalising the Kullback–Leibler (KL) divergence between the probability distribution of the perturbed and normal model. We believe that consistency losses may be implicitly regularizing the loss landscape. In particular, we build on work hypothesising that implicitly or explicitly regularizing trace of the Fisher Information Matrix (FIM), amplifies the implicit bias of SGD to avoid memorization. Our initial results show both empirically and theoretically that consistency losses are related to the FIM, and show that the flat minima implied by a small trace of the FIM improves performance when fine-tuning a multilingual model on additional languages. We aim to confirm these initial results on more datasets, and use our insights to develop better multilingual fine-tuning techniques.</abstract>
<identifier type="citekey">cooper-stickland-murray-2021-regularising</identifier>
<identifier type="doi">10.18653/v1/2021.mrl-1.20</identifier>
<location>
<url>https://aclanthology.org/2021.mrl-1.20</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>238</start>
<end>241</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Regularising Fisher Information Improves Cross-lingual Generalisation
%A Cooper Stickland, Asa
%A Murray, Iain
%S Proceedings of the 1st Workshop on Multilingual Representation Learning
%D 2021
%8 November
%I Association for Computational Linguistics
%C Punta Cana, Dominican Republic
%F cooper-stickland-murray-2021-regularising
%X Many recent works use ‘consistency regularisation’ to improve the generalisation of fine-tuned pre-trained models, both multilingual and English-only. These works encourage model outputs to be similar between a perturbed and normal version of the input, usually via penalising the Kullback–Leibler (KL) divergence between the probability distribution of the perturbed and normal model. We believe that consistency losses may be implicitly regularizing the loss landscape. In particular, we build on work hypothesising that implicitly or explicitly regularizing trace of the Fisher Information Matrix (FIM), amplifies the implicit bias of SGD to avoid memorization. Our initial results show both empirically and theoretically that consistency losses are related to the FIM, and show that the flat minima implied by a small trace of the FIM improves performance when fine-tuning a multilingual model on additional languages. We aim to confirm these initial results on more datasets, and use our insights to develop better multilingual fine-tuning techniques.
%R 10.18653/v1/2021.mrl-1.20
%U https://aclanthology.org/2021.mrl-1.20
%U https://doi.org/10.18653/v1/2021.mrl-1.20
%P 238-241
Markdown (Informal)
[Regularising Fisher Information Improves Cross-lingual Generalisation](https://aclanthology.org/2021.mrl-1.20) (Cooper Stickland & Murray, MRL 2021)
ACL