@inproceedings{tanti-etal-2021-language,
title = "On the Language-specificity of Multilingual {BERT} and the Impact of Fine-tuning",
author = "Tanti, Marc and
van der Plas, Lonneke and
Borg, Claudia and
Gatt, Albert",
editor = "Bastings, Jasmijn and
Belinkov, Yonatan and
Dupoux, Emmanuel and
Giulianelli, Mario and
Hupkes, Dieuwke and
Pinter, Yuval and
Sajjad, Hassan",
booktitle = "Proceedings of the Fourth BlackboxNLP Workshop on Analyzing and Interpreting Neural Networks for NLP",
month = nov,
year = "2021",
address = "Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.blackboxnlp-1.15",
doi = "10.18653/v1/2021.blackboxnlp-1.15",
pages = "214--227",
abstract = "Recent work has shown evidence that the knowledge acquired by multilingual BERT (mBERT) has two components: a language-specific and a language-neutral one. This paper analyses the relationship between them, in the context of fine-tuning on two tasks {--} POS tagging and natural language inference {--} which require the model to bring to bear different degrees of language-specific knowledge. Visualisations reveal that mBERT loses the ability to cluster representations by language after fine-tuning, a result that is supported by evidence from language identification experiments. However, further experiments on {`}unlearning{'} language-specific representations using gradient reversal and iterative adversarial learning are shown not to add further improvement to the language-independent component over and above the effect of fine-tuning. The results presented here suggest that the process of fine-tuning causes a reorganisation of the model{'}s limited representational capacity, enhancing language-independent representations at the expense of language-specific ones.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tanti-etal-2021-language">
<titleInfo>
<title>On the Language-specificity of Multilingual BERT and the Impact of Fine-tuning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marc</namePart>
<namePart type="family">Tanti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lonneke</namePart>
<namePart type="family">van der Plas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claudia</namePart>
<namePart type="family">Borg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Albert</namePart>
<namePart type="family">Gatt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth BlackboxNLP Workshop on Analyzing and Interpreting Neural Networks for NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jasmijn</namePart>
<namePart type="family">Bastings</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yonatan</namePart>
<namePart type="family">Belinkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emmanuel</namePart>
<namePart type="family">Dupoux</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mario</namePart>
<namePart type="family">Giulianelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dieuwke</namePart>
<namePart type="family">Hupkes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuval</namePart>
<namePart type="family">Pinter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hassan</namePart>
<namePart type="family">Sajjad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recent work has shown evidence that the knowledge acquired by multilingual BERT (mBERT) has two components: a language-specific and a language-neutral one. This paper analyses the relationship between them, in the context of fine-tuning on two tasks – POS tagging and natural language inference – which require the model to bring to bear different degrees of language-specific knowledge. Visualisations reveal that mBERT loses the ability to cluster representations by language after fine-tuning, a result that is supported by evidence from language identification experiments. However, further experiments on ‘unlearning’ language-specific representations using gradient reversal and iterative adversarial learning are shown not to add further improvement to the language-independent component over and above the effect of fine-tuning. The results presented here suggest that the process of fine-tuning causes a reorganisation of the model’s limited representational capacity, enhancing language-independent representations at the expense of language-specific ones.</abstract>
<identifier type="citekey">tanti-etal-2021-language</identifier>
<identifier type="doi">10.18653/v1/2021.blackboxnlp-1.15</identifier>
<location>
<url>https://aclanthology.org/2021.blackboxnlp-1.15</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>214</start>
<end>227</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T On the Language-specificity of Multilingual BERT and the Impact of Fine-tuning
%A Tanti, Marc
%A van der Plas, Lonneke
%A Borg, Claudia
%A Gatt, Albert
%Y Bastings, Jasmijn
%Y Belinkov, Yonatan
%Y Dupoux, Emmanuel
%Y Giulianelli, Mario
%Y Hupkes, Dieuwke
%Y Pinter, Yuval
%Y Sajjad, Hassan
%S Proceedings of the Fourth BlackboxNLP Workshop on Analyzing and Interpreting Neural Networks for NLP
%D 2021
%8 November
%I Association for Computational Linguistics
%C Punta Cana, Dominican Republic
%F tanti-etal-2021-language
%X Recent work has shown evidence that the knowledge acquired by multilingual BERT (mBERT) has two components: a language-specific and a language-neutral one. This paper analyses the relationship between them, in the context of fine-tuning on two tasks – POS tagging and natural language inference – which require the model to bring to bear different degrees of language-specific knowledge. Visualisations reveal that mBERT loses the ability to cluster representations by language after fine-tuning, a result that is supported by evidence from language identification experiments. However, further experiments on ‘unlearning’ language-specific representations using gradient reversal and iterative adversarial learning are shown not to add further improvement to the language-independent component over and above the effect of fine-tuning. The results presented here suggest that the process of fine-tuning causes a reorganisation of the model’s limited representational capacity, enhancing language-independent representations at the expense of language-specific ones.
%R 10.18653/v1/2021.blackboxnlp-1.15
%U https://aclanthology.org/2021.blackboxnlp-1.15
%U https://doi.org/10.18653/v1/2021.blackboxnlp-1.15
%P 214-227
Markdown (Informal)
[On the Language-specificity of Multilingual BERT and the Impact of Fine-tuning](https://aclanthology.org/2021.blackboxnlp-1.15) (Tanti et al., BlackboxNLP 2021)
ACL