@inproceedings{rajaee-pilehvar-2021-fine-tuning,
title = "How Does Fine-tuning Affect the Geometry of Embedding Space: A Case Study on Isotropy",
author = "Rajaee, Sara and
Pilehvar, Mohammad Taher",
editor = "Moens, Marie-Francine and
Huang, Xuanjing and
Specia, Lucia and
Yih, Scott Wen-tau",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2021",
month = nov,
year = "2021",
address = "Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.findings-emnlp.261",
doi = "10.18653/v1/2021.findings-emnlp.261",
pages = "3042--3049",
abstract = "It is widely accepted that fine-tuning pre-trained language models usually brings about performance improvements in downstream tasks. However, there are limited studies on the reasons behind this effectiveness, particularly from the viewpoint of structural changes in the embedding space. Trying to fill this gap, in this paper, we analyze the extent to which the isotropy of the embedding space changes after fine-tuning. We demonstrate that, even though isotropy is a desirable geometrical property, fine-tuning does not necessarily result in isotropy enhancements. Moreover, local structures in pre-trained contextual word representations (CWRs), such as those encoding token types or frequency, undergo a massive change during fine-tuning. Our experiments show dramatic growth in the number of elongated directions in the embedding space, which, in contrast to pre-trained CWRs, carry the essential linguistic knowledge in the fine-tuned embedding space, making existing isotropy enhancement methods ineffective.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rajaee-pilehvar-2021-fine-tuning">
<titleInfo>
<title>How Does Fine-tuning Affect the Geometry of Embedding Space: A Case Study on Isotropy</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Rajaee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2021</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marie-Francine</namePart>
<namePart type="family">Moens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuanjing</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucia</namePart>
<namePart type="family">Specia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Scott</namePart>
<namePart type="given">Wen-tau</namePart>
<namePart type="family">Yih</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>It is widely accepted that fine-tuning pre-trained language models usually brings about performance improvements in downstream tasks. However, there are limited studies on the reasons behind this effectiveness, particularly from the viewpoint of structural changes in the embedding space. Trying to fill this gap, in this paper, we analyze the extent to which the isotropy of the embedding space changes after fine-tuning. We demonstrate that, even though isotropy is a desirable geometrical property, fine-tuning does not necessarily result in isotropy enhancements. Moreover, local structures in pre-trained contextual word representations (CWRs), such as those encoding token types or frequency, undergo a massive change during fine-tuning. Our experiments show dramatic growth in the number of elongated directions in the embedding space, which, in contrast to pre-trained CWRs, carry the essential linguistic knowledge in the fine-tuned embedding space, making existing isotropy enhancement methods ineffective.</abstract>
<identifier type="citekey">rajaee-pilehvar-2021-fine-tuning</identifier>
<identifier type="doi">10.18653/v1/2021.findings-emnlp.261</identifier>
<location>
<url>https://aclanthology.org/2021.findings-emnlp.261</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>3042</start>
<end>3049</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T How Does Fine-tuning Affect the Geometry of Embedding Space: A Case Study on Isotropy
%A Rajaee, Sara
%A Pilehvar, Mohammad Taher
%Y Moens, Marie-Francine
%Y Huang, Xuanjing
%Y Specia, Lucia
%Y Yih, Scott Wen-tau
%S Findings of the Association for Computational Linguistics: EMNLP 2021
%D 2021
%8 November
%I Association for Computational Linguistics
%C Punta Cana, Dominican Republic
%F rajaee-pilehvar-2021-fine-tuning
%X It is widely accepted that fine-tuning pre-trained language models usually brings about performance improvements in downstream tasks. However, there are limited studies on the reasons behind this effectiveness, particularly from the viewpoint of structural changes in the embedding space. Trying to fill this gap, in this paper, we analyze the extent to which the isotropy of the embedding space changes after fine-tuning. We demonstrate that, even though isotropy is a desirable geometrical property, fine-tuning does not necessarily result in isotropy enhancements. Moreover, local structures in pre-trained contextual word representations (CWRs), such as those encoding token types or frequency, undergo a massive change during fine-tuning. Our experiments show dramatic growth in the number of elongated directions in the embedding space, which, in contrast to pre-trained CWRs, carry the essential linguistic knowledge in the fine-tuned embedding space, making existing isotropy enhancement methods ineffective.
%R 10.18653/v1/2021.findings-emnlp.261
%U https://aclanthology.org/2021.findings-emnlp.261
%U https://doi.org/10.18653/v1/2021.findings-emnlp.261
%P 3042-3049
Markdown (Informal)
[How Does Fine-tuning Affect the Geometry of Embedding Space: A Case Study on Isotropy](https://aclanthology.org/2021.findings-emnlp.261) (Rajaee & Pilehvar, Findings 2021)
ACL