@inproceedings{hernandez-andreas-2021-low,
title = "The Low-Dimensional Linear Geometry of Contextualized Word Representations",
author = "Hernandez, Evan and
Andreas, Jacob",
editor = "Bisazza, Arianna and
Abend, Omri",
booktitle = "Proceedings of the 25th Conference on Computational Natural Language Learning",
month = nov,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.conll-1.7/",
doi = "10.18653/v1/2021.conll-1.7",
pages = "82--93",
abstract = "Black-box probing models can reliably extract linguistic features like tense, number, and syntactic role from pretrained word representations. However, the manner in which these features are encoded in representations remains poorly understood. We present a systematic study of the linear geometry of contextualized word representations in ELMO and BERT. We show that a variety of linguistic features (including structured dependency relationships) are encoded in low-dimensional subspaces. We then refine this geometric picture, showing that there are hierarchical relations between the subspaces encoding general linguistic categories and more specific ones, and that low-dimensional feature encodings are distributed rather than aligned to individual neurons. Finally, we demonstrate that these linear subspaces are causally related to model behavior, and can be used to perform fine-grained manipulation of BERT`s output distribution."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hernandez-andreas-2021-low">
<titleInfo>
<title>The Low-Dimensional Linear Geometry of Contextualized Word Representations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Evan</namePart>
<namePart type="family">Hernandez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jacob</namePart>
<namePart type="family">Andreas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 25th Conference on Computational Natural Language Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Arianna</namePart>
<namePart type="family">Bisazza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Omri</namePart>
<namePart type="family">Abend</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Black-box probing models can reliably extract linguistic features like tense, number, and syntactic role from pretrained word representations. However, the manner in which these features are encoded in representations remains poorly understood. We present a systematic study of the linear geometry of contextualized word representations in ELMO and BERT. We show that a variety of linguistic features (including structured dependency relationships) are encoded in low-dimensional subspaces. We then refine this geometric picture, showing that there are hierarchical relations between the subspaces encoding general linguistic categories and more specific ones, and that low-dimensional feature encodings are distributed rather than aligned to individual neurons. Finally, we demonstrate that these linear subspaces are causally related to model behavior, and can be used to perform fine-grained manipulation of BERT‘s output distribution.</abstract>
<identifier type="citekey">hernandez-andreas-2021-low</identifier>
<identifier type="doi">10.18653/v1/2021.conll-1.7</identifier>
<location>
<url>https://aclanthology.org/2021.conll-1.7/</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>82</start>
<end>93</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Low-Dimensional Linear Geometry of Contextualized Word Representations
%A Hernandez, Evan
%A Andreas, Jacob
%Y Bisazza, Arianna
%Y Abend, Omri
%S Proceedings of the 25th Conference on Computational Natural Language Learning
%D 2021
%8 November
%I Association for Computational Linguistics
%C Online
%F hernandez-andreas-2021-low
%X Black-box probing models can reliably extract linguistic features like tense, number, and syntactic role from pretrained word representations. However, the manner in which these features are encoded in representations remains poorly understood. We present a systematic study of the linear geometry of contextualized word representations in ELMO and BERT. We show that a variety of linguistic features (including structured dependency relationships) are encoded in low-dimensional subspaces. We then refine this geometric picture, showing that there are hierarchical relations between the subspaces encoding general linguistic categories and more specific ones, and that low-dimensional feature encodings are distributed rather than aligned to individual neurons. Finally, we demonstrate that these linear subspaces are causally related to model behavior, and can be used to perform fine-grained manipulation of BERT‘s output distribution.
%R 10.18653/v1/2021.conll-1.7
%U https://aclanthology.org/2021.conll-1.7/
%U https://doi.org/10.18653/v1/2021.conll-1.7
%P 82-93
Markdown (Informal)
[The Low-Dimensional Linear Geometry of Contextualized Word Representations](https://aclanthology.org/2021.conll-1.7/) (Hernandez & Andreas, CoNLL 2021)
ACL