@inproceedings{schuster-hegelich-2022-berts,
title = "From {BERT}{`}s {P}oint of {V}iew: {R}evealing the {P}revailing {C}ontextual {D}ifferences",
author = "Schuster, Carolin M. and
Hegelich, Simon",
editor = "Muresan, Smaranda and
Nakov, Preslav and
Villavicencio, Aline",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2022",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.findings-acl.89",
doi = "10.18653/v1/2022.findings-acl.89",
pages = "1120--1138",
abstract = "Though successfully applied in research and industry large pretrained language models of the BERT family are not yet fully understood. While much research in the field of BERTology has tested whether specific knowledge can be extracted from layer activations, we invert the popular probing design to analyze the prevailing differences and clusters in BERT{'}s high dimensional space. By extracting coarse features from masked token representations and predicting them by probing models with access to only partial information we can apprehend the variation from {`}BERT{'}s point of view{'}. By applying our new methodology to different datasets we show how much the differences can be described by syntax but further how they are to a great extent shaped by the most simple positional information.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="schuster-hegelich-2022-berts">
<titleInfo>
<title>From BERT‘s Point of View: Revealing the Prevailing Contextual Differences</title>
</titleInfo>
<name type="personal">
<namePart type="given">Carolin</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Schuster</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Hegelich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Smaranda</namePart>
<namePart type="family">Muresan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aline</namePart>
<namePart type="family">Villavicencio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Though successfully applied in research and industry large pretrained language models of the BERT family are not yet fully understood. While much research in the field of BERTology has tested whether specific knowledge can be extracted from layer activations, we invert the popular probing design to analyze the prevailing differences and clusters in BERT’s high dimensional space. By extracting coarse features from masked token representations and predicting them by probing models with access to only partial information we can apprehend the variation from ‘BERT’s point of view’. By applying our new methodology to different datasets we show how much the differences can be described by syntax but further how they are to a great extent shaped by the most simple positional information.</abstract>
<identifier type="citekey">schuster-hegelich-2022-berts</identifier>
<identifier type="doi">10.18653/v1/2022.findings-acl.89</identifier>
<location>
<url>https://aclanthology.org/2022.findings-acl.89</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>1120</start>
<end>1138</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T From BERT‘s Point of View: Revealing the Prevailing Contextual Differences
%A Schuster, Carolin M.
%A Hegelich, Simon
%Y Muresan, Smaranda
%Y Nakov, Preslav
%Y Villavicencio, Aline
%S Findings of the Association for Computational Linguistics: ACL 2022
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F schuster-hegelich-2022-berts
%X Though successfully applied in research and industry large pretrained language models of the BERT family are not yet fully understood. While much research in the field of BERTology has tested whether specific knowledge can be extracted from layer activations, we invert the popular probing design to analyze the prevailing differences and clusters in BERT’s high dimensional space. By extracting coarse features from masked token representations and predicting them by probing models with access to only partial information we can apprehend the variation from ‘BERT’s point of view’. By applying our new methodology to different datasets we show how much the differences can be described by syntax but further how they are to a great extent shaped by the most simple positional information.
%R 10.18653/v1/2022.findings-acl.89
%U https://aclanthology.org/2022.findings-acl.89
%U https://doi.org/10.18653/v1/2022.findings-acl.89
%P 1120-1138
Markdown (Informal)
[From BERT‘s Point of View: Revealing the Prevailing Contextual Differences](https://aclanthology.org/2022.findings-acl.89) (Schuster & Hegelich, Findings 2022)
ACL