@inproceedings{huebner-etal-2021-babyberta,
title = "{B}aby{BERT}a: Learning More Grammar With Small-Scale Child-Directed Language",
author = "Huebner, Philip A. and
Sulem, Elior and
Cynthia, Fisher and
Roth, Dan",
editor = "Bisazza, Arianna and
Abend, Omri",
booktitle = "Proceedings of the 25th Conference on Computational Natural Language Learning",
month = nov,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.conll-1.49/",
doi = "10.18653/v1/2021.conll-1.49",
pages = "624--646",
abstract = "Transformer-based language models have taken the NLP world by storm. However, their potential for addressing important questions in language acquisition research has been largely ignored. In this work, we examined the grammatical knowledge of RoBERTa (Liu et al., 2019) when trained on a 5M word corpus of language acquisition data to simulate the input available to children between the ages 1 and 6. Using the behavioral probing paradigm, we found that a smaller version of RoBERTa-base that never predicts unmasked tokens, which we term BabyBERTa, acquires grammatical knowledge comparable to that of pre-trained RoBERTa-base - and does so with approximately 15X fewer parameters and 6,000X fewer words. We discuss implications for building more efficient models and the learnability of grammar from input available to children. Lastly, to support research on this front, we release our novel grammar test suite that is compatible with the small vocabulary of child-directed input."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="huebner-etal-2021-babyberta">
<titleInfo>
<title>BabyBERTa: Learning More Grammar With Small-Scale Child-Directed Language</title>
</titleInfo>
<name type="personal">
<namePart type="given">Philip</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Huebner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elior</namePart>
<namePart type="family">Sulem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fisher</namePart>
<namePart type="family">Cynthia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Roth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 25th Conference on Computational Natural Language Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Arianna</namePart>
<namePart type="family">Bisazza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Omri</namePart>
<namePart type="family">Abend</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Transformer-based language models have taken the NLP world by storm. However, their potential for addressing important questions in language acquisition research has been largely ignored. In this work, we examined the grammatical knowledge of RoBERTa (Liu et al., 2019) when trained on a 5M word corpus of language acquisition data to simulate the input available to children between the ages 1 and 6. Using the behavioral probing paradigm, we found that a smaller version of RoBERTa-base that never predicts unmasked tokens, which we term BabyBERTa, acquires grammatical knowledge comparable to that of pre-trained RoBERTa-base - and does so with approximately 15X fewer parameters and 6,000X fewer words. We discuss implications for building more efficient models and the learnability of grammar from input available to children. Lastly, to support research on this front, we release our novel grammar test suite that is compatible with the small vocabulary of child-directed input.</abstract>
<identifier type="citekey">huebner-etal-2021-babyberta</identifier>
<identifier type="doi">10.18653/v1/2021.conll-1.49</identifier>
<location>
<url>https://aclanthology.org/2021.conll-1.49/</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>624</start>
<end>646</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BabyBERTa: Learning More Grammar With Small-Scale Child-Directed Language
%A Huebner, Philip A.
%A Sulem, Elior
%A Cynthia, Fisher
%A Roth, Dan
%Y Bisazza, Arianna
%Y Abend, Omri
%S Proceedings of the 25th Conference on Computational Natural Language Learning
%D 2021
%8 November
%I Association for Computational Linguistics
%C Online
%F huebner-etal-2021-babyberta
%X Transformer-based language models have taken the NLP world by storm. However, their potential for addressing important questions in language acquisition research has been largely ignored. In this work, we examined the grammatical knowledge of RoBERTa (Liu et al., 2019) when trained on a 5M word corpus of language acquisition data to simulate the input available to children between the ages 1 and 6. Using the behavioral probing paradigm, we found that a smaller version of RoBERTa-base that never predicts unmasked tokens, which we term BabyBERTa, acquires grammatical knowledge comparable to that of pre-trained RoBERTa-base - and does so with approximately 15X fewer parameters and 6,000X fewer words. We discuss implications for building more efficient models and the learnability of grammar from input available to children. Lastly, to support research on this front, we release our novel grammar test suite that is compatible with the small vocabulary of child-directed input.
%R 10.18653/v1/2021.conll-1.49
%U https://aclanthology.org/2021.conll-1.49/
%U https://doi.org/10.18653/v1/2021.conll-1.49
%P 624-646
Markdown (Informal)
[BabyBERTa: Learning More Grammar With Small-Scale Child-Directed Language](https://aclanthology.org/2021.conll-1.49/) (Huebner et al., CoNLL 2021)
ACL