@inproceedings{jawahar-etal-2019-bert,
title = "What Does {BERT} Learn about the Structure of Language?",
author = "Jawahar, Ganesh and
Sagot, Beno{\^\i}t and
Seddah, Djam{\'e}",
editor = "Korhonen, Anna and
Traum, David and
M{\`a}rquez, Llu{\'\i}s",
booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics",
month = jul,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/P19-1356",
doi = "10.18653/v1/P19-1356",
pages = "3651--3657",
abstract = "BERT is a recent language representation model that has surprisingly performed well in diverse language understanding benchmarks. This result indicates the possibility that BERT networks capture structural information about language. In this work, we provide novel support for this claim by performing a series of experiments to unpack the elements of English language structure learned by BERT. Our findings are fourfold. BERT{'}s phrasal representation captures the phrase-level information in the lower layers. The intermediate layers of BERT compose a rich hierarchy of linguistic information, starting with surface features at the bottom, syntactic features in the middle followed by semantic features at the top. BERT requires deeper layers while tracking subject-verb agreement to handle long-term dependency problem. Finally, the compositional scheme underlying BERT mimics classical, tree-like structures.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jawahar-etal-2019-bert">
<titleInfo>
<title>What Does BERT Learn about the Structure of Language?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ganesh</namePart>
<namePart type="family">Jawahar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benoît</namePart>
<namePart type="family">Sagot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Djamé</namePart>
<namePart type="family">Seddah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Korhonen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Traum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Màrquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>BERT is a recent language representation model that has surprisingly performed well in diverse language understanding benchmarks. This result indicates the possibility that BERT networks capture structural information about language. In this work, we provide novel support for this claim by performing a series of experiments to unpack the elements of English language structure learned by BERT. Our findings are fourfold. BERT’s phrasal representation captures the phrase-level information in the lower layers. The intermediate layers of BERT compose a rich hierarchy of linguistic information, starting with surface features at the bottom, syntactic features in the middle followed by semantic features at the top. BERT requires deeper layers while tracking subject-verb agreement to handle long-term dependency problem. Finally, the compositional scheme underlying BERT mimics classical, tree-like structures.</abstract>
<identifier type="citekey">jawahar-etal-2019-bert</identifier>
<identifier type="doi">10.18653/v1/P19-1356</identifier>
<location>
<url>https://aclanthology.org/P19-1356</url>
</location>
<part>
<date>2019-07</date>
<extent unit="page">
<start>3651</start>
<end>3657</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T What Does BERT Learn about the Structure of Language?
%A Jawahar, Ganesh
%A Sagot, Benoît
%A Seddah, Djamé
%Y Korhonen, Anna
%Y Traum, David
%Y Màrquez, Lluís
%S Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics
%D 2019
%8 July
%I Association for Computational Linguistics
%C Florence, Italy
%F jawahar-etal-2019-bert
%X BERT is a recent language representation model that has surprisingly performed well in diverse language understanding benchmarks. This result indicates the possibility that BERT networks capture structural information about language. In this work, we provide novel support for this claim by performing a series of experiments to unpack the elements of English language structure learned by BERT. Our findings are fourfold. BERT’s phrasal representation captures the phrase-level information in the lower layers. The intermediate layers of BERT compose a rich hierarchy of linguistic information, starting with surface features at the bottom, syntactic features in the middle followed by semantic features at the top. BERT requires deeper layers while tracking subject-verb agreement to handle long-term dependency problem. Finally, the compositional scheme underlying BERT mimics classical, tree-like structures.
%R 10.18653/v1/P19-1356
%U https://aclanthology.org/P19-1356
%U https://doi.org/10.18653/v1/P19-1356
%P 3651-3657
Markdown (Informal)
[What Does BERT Learn about the Structure of Language?](https://aclanthology.org/P19-1356) (Jawahar et al., ACL 2019)
ACL
- Ganesh Jawahar, Benoît Sagot, and Djamé Seddah. 2019. What Does BERT Learn about the Structure of Language?. In Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, pages 3651–3657, Florence, Italy. Association for Computational Linguistics.