@inproceedings{chesi-etal-2024-different,
title = "Different Ways to Forget: Linguistic Gates in Recurrent Neural Networks",
author = "Chesi, Cristiano and
Bressan, Veronica and
Barbini, Matilde and
Fusco, Achille and
Bianchessi, Maria Letizia Piccini and
Neri, Sofia and
Rossi, Sarah and
Sgrizzi, Tommaso",
editor = "Hu, Michael Y. and
Mueller, Aaron and
Ross, Candace and
Williams, Adina and
Linzen, Tal and
Zhuang, Chengxu and
Choshen, Leshem and
Cotterell, Ryan and
Warstadt, Alex and
Wilcox, Ethan Gotlieb",
booktitle = "The 2nd BabyLM Challenge at the 28th Conference on Computational Natural Language Learning",
month = nov,
year = "2024",
address = "Miami, FL, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.conll-babylm.9/",
pages = "106--117",
abstract = "This work explores alternative gating systems in simple Recurrent Neural Networks (RNNs) that induce linguistically motivated biases during training, ultimately affecting models' performance on the BLiMP task. We focus exclusively on the BabyLM 10M training corpus (Strict-Small Track). Our experiments reveal that: (i) standard RNN variants{---}LSTMs and GRUs{---}are insufficient for properly learning the relevant set of linguistic constraints; (ii) the quality or size of the training corpus has little impact on these networks, as demonstrated by the comparable performance of LSTMs trained exclusively on the child-directed speech portion of the corpus; (iii) increasing the size of the embedding and hidden layers does not significantly improve performance. In contrast, specifically gated RNNs (eMG-RNNs), inspired by certain Minimalist Grammar intuitions, exhibit advantages in both training loss and BLiMP accuracy."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chesi-etal-2024-different">
<titleInfo>
<title>Different Ways to Forget: Linguistic Gates in Recurrent Neural Networks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Cristiano</namePart>
<namePart type="family">Chesi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronica</namePart>
<namePart type="family">Bressan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matilde</namePart>
<namePart type="family">Barbini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Achille</namePart>
<namePart type="family">Fusco</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="given">Letizia</namePart>
<namePart type="given">Piccini</namePart>
<namePart type="family">Bianchessi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sofia</namePart>
<namePart type="family">Neri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sarah</namePart>
<namePart type="family">Rossi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tommaso</namePart>
<namePart type="family">Sgrizzi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>The 2nd BabyLM Challenge at the 28th Conference on Computational Natural Language Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="given">Y</namePart>
<namePart type="family">Hu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aaron</namePart>
<namePart type="family">Mueller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Candace</namePart>
<namePart type="family">Ross</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adina</namePart>
<namePart type="family">Williams</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tal</namePart>
<namePart type="family">Linzen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chengxu</namePart>
<namePart type="family">Zhuang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leshem</namePart>
<namePart type="family">Choshen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Cotterell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alex</namePart>
<namePart type="family">Warstadt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ethan</namePart>
<namePart type="given">Gotlieb</namePart>
<namePart type="family">Wilcox</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, FL, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This work explores alternative gating systems in simple Recurrent Neural Networks (RNNs) that induce linguistically motivated biases during training, ultimately affecting models’ performance on the BLiMP task. We focus exclusively on the BabyLM 10M training corpus (Strict-Small Track). Our experiments reveal that: (i) standard RNN variants—LSTMs and GRUs—are insufficient for properly learning the relevant set of linguistic constraints; (ii) the quality or size of the training corpus has little impact on these networks, as demonstrated by the comparable performance of LSTMs trained exclusively on the child-directed speech portion of the corpus; (iii) increasing the size of the embedding and hidden layers does not significantly improve performance. In contrast, specifically gated RNNs (eMG-RNNs), inspired by certain Minimalist Grammar intuitions, exhibit advantages in both training loss and BLiMP accuracy.</abstract>
<identifier type="citekey">chesi-etal-2024-different</identifier>
<location>
<url>https://aclanthology.org/2024.conll-babylm.9/</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>106</start>
<end>117</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Different Ways to Forget: Linguistic Gates in Recurrent Neural Networks
%A Chesi, Cristiano
%A Bressan, Veronica
%A Barbini, Matilde
%A Fusco, Achille
%A Bianchessi, Maria Letizia Piccini
%A Neri, Sofia
%A Rossi, Sarah
%A Sgrizzi, Tommaso
%Y Hu, Michael Y.
%Y Mueller, Aaron
%Y Ross, Candace
%Y Williams, Adina
%Y Linzen, Tal
%Y Zhuang, Chengxu
%Y Choshen, Leshem
%Y Cotterell, Ryan
%Y Warstadt, Alex
%Y Wilcox, Ethan Gotlieb
%S The 2nd BabyLM Challenge at the 28th Conference on Computational Natural Language Learning
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, FL, USA
%F chesi-etal-2024-different
%X This work explores alternative gating systems in simple Recurrent Neural Networks (RNNs) that induce linguistically motivated biases during training, ultimately affecting models’ performance on the BLiMP task. We focus exclusively on the BabyLM 10M training corpus (Strict-Small Track). Our experiments reveal that: (i) standard RNN variants—LSTMs and GRUs—are insufficient for properly learning the relevant set of linguistic constraints; (ii) the quality or size of the training corpus has little impact on these networks, as demonstrated by the comparable performance of LSTMs trained exclusively on the child-directed speech portion of the corpus; (iii) increasing the size of the embedding and hidden layers does not significantly improve performance. In contrast, specifically gated RNNs (eMG-RNNs), inspired by certain Minimalist Grammar intuitions, exhibit advantages in both training loss and BLiMP accuracy.
%U https://aclanthology.org/2024.conll-babylm.9/
%P 106-117
Markdown (Informal)
[Different Ways to Forget: Linguistic Gates in Recurrent Neural Networks](https://aclanthology.org/2024.conll-babylm.9/) (Chesi et al., CoNLL-BabyLM 2024)
ACL
- Cristiano Chesi, Veronica Bressan, Matilde Barbini, Achille Fusco, Maria Letizia Piccini Bianchessi, Sofia Neri, Sarah Rossi, and Tommaso Sgrizzi. 2024. Different Ways to Forget: Linguistic Gates in Recurrent Neural Networks. In The 2nd BabyLM Challenge at the 28th Conference on Computational Natural Language Learning, pages 106–117, Miami, FL, USA. Association for Computational Linguistics.