@inproceedings{fusco-etal-2024-recurrent,
title = "Recurrent Networks Are (Linguistically) Better? An (Ongoing) Experiment on Small-{LM} Training on Child-Directed Speech in {I}talian",
author = "Fusco, Achille and
Barbini, Matilde and
Piccini Bianchessi, Maria Letizia and
Bressan, Veronica and
Neri, Sofia and
Rossi, Sarah and
Sgrizzi, Tommaso and
Chesi, Cristiano",
editor = "Dell'Orletta, Felice and
Lenci, Alessandro and
Montemagni, Simonetta and
Sprugnoli, Rachele",
booktitle = "Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)",
month = dec,
year = "2024",
address = "Pisa, Italy",
publisher = "CEUR Workshop Proceedings",
url = "https://aclanthology.org/2024.clicit-1.46/",
pages = "382--389",
ISBN = "979-12-210-7060-6",
abstract = "We discuss the strategies and results of a small-sized training program based on Italian child-directed speech (less than 3M tokens) for various network architectures. The rationale behind these experiments [1] lies in the attempt to understand the effect of this naturalistic training diet on different models architecture. Preliminary findings lead us to conclude that (a) different tokenization strategies produce only numerical, but not statistically significant, improvements overall, although segmentation aligns more or less with linguistic intuitions; and (b) modified LSTM networks with a single layer and a structurally more controlled cell state perform worse in training (compared to standard one- and two-layered LSTM models) but better on linguistically critical contrasts. This suggests that standard loss/accuracy metrics in autoregressive training procedures are linguistically irrelevant and, more generally, misleading, since the best-trained models qualify as poorer {\textquotedblleft}linguistic theories{\textquotedblright} ([2], pace [3])."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fusco-etal-2024-recurrent">
<titleInfo>
<title>Recurrent Networks Are (Linguistically) Better? An (Ongoing) Experiment on Small-LM Training on Child-Directed Speech in Italian</title>
</titleInfo>
<name type="personal">
<namePart type="given">Achille</namePart>
<namePart type="family">Fusco</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matilde</namePart>
<namePart type="family">Barbini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="given">Letizia</namePart>
<namePart type="family">Piccini Bianchessi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronica</namePart>
<namePart type="family">Bressan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sofia</namePart>
<namePart type="family">Neri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sarah</namePart>
<namePart type="family">Rossi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tommaso</namePart>
<namePart type="family">Sgrizzi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cristiano</namePart>
<namePart type="family">Chesi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Felice</namePart>
<namePart type="family">Dell’Orletta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simonetta</namePart>
<namePart type="family">Montemagni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rachele</namePart>
<namePart type="family">Sprugnoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>CEUR Workshop Proceedings</publisher>
<place>
<placeTerm type="text">Pisa, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-12-210-7060-6</identifier>
</relatedItem>
<abstract>We discuss the strategies and results of a small-sized training program based on Italian child-directed speech (less than 3M tokens) for various network architectures. The rationale behind these experiments [1] lies in the attempt to understand the effect of this naturalistic training diet on different models architecture. Preliminary findings lead us to conclude that (a) different tokenization strategies produce only numerical, but not statistically significant, improvements overall, although segmentation aligns more or less with linguistic intuitions; and (b) modified LSTM networks with a single layer and a structurally more controlled cell state perform worse in training (compared to standard one- and two-layered LSTM models) but better on linguistically critical contrasts. This suggests that standard loss/accuracy metrics in autoregressive training procedures are linguistically irrelevant and, more generally, misleading, since the best-trained models qualify as poorer “linguistic theories” ([2], pace [3]).</abstract>
<identifier type="citekey">fusco-etal-2024-recurrent</identifier>
<location>
<url>https://aclanthology.org/2024.clicit-1.46/</url>
</location>
<part>
<date>2024-12</date>
<extent unit="page">
<start>382</start>
<end>389</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Recurrent Networks Are (Linguistically) Better? An (Ongoing) Experiment on Small-LM Training on Child-Directed Speech in Italian
%A Fusco, Achille
%A Barbini, Matilde
%A Piccini Bianchessi, Maria Letizia
%A Bressan, Veronica
%A Neri, Sofia
%A Rossi, Sarah
%A Sgrizzi, Tommaso
%A Chesi, Cristiano
%Y Dell’Orletta, Felice
%Y Lenci, Alessandro
%Y Montemagni, Simonetta
%Y Sprugnoli, Rachele
%S Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)
%D 2024
%8 December
%I CEUR Workshop Proceedings
%C Pisa, Italy
%@ 979-12-210-7060-6
%F fusco-etal-2024-recurrent
%X We discuss the strategies and results of a small-sized training program based on Italian child-directed speech (less than 3M tokens) for various network architectures. The rationale behind these experiments [1] lies in the attempt to understand the effect of this naturalistic training diet on different models architecture. Preliminary findings lead us to conclude that (a) different tokenization strategies produce only numerical, but not statistically significant, improvements overall, although segmentation aligns more or less with linguistic intuitions; and (b) modified LSTM networks with a single layer and a structurally more controlled cell state perform worse in training (compared to standard one- and two-layered LSTM models) but better on linguistically critical contrasts. This suggests that standard loss/accuracy metrics in autoregressive training procedures are linguistically irrelevant and, more generally, misleading, since the best-trained models qualify as poorer “linguistic theories” ([2], pace [3]).
%U https://aclanthology.org/2024.clicit-1.46/
%P 382-389
Markdown (Informal)
[Recurrent Networks Are (Linguistically) Better? An (Ongoing) Experiment on Small-LM Training on Child-Directed Speech in Italian](https://aclanthology.org/2024.clicit-1.46/) (Fusco et al., CLiC-it 2024)
ACL
- Achille Fusco, Matilde Barbini, Maria Letizia Piccini Bianchessi, Veronica Bressan, Sofia Neri, Sarah Rossi, Tommaso Sgrizzi, and Cristiano Chesi. 2024. Recurrent Networks Are (Linguistically) Better? An (Ongoing) Experiment on Small-LM Training on Child-Directed Speech in Italian. In Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024), pages 382–389, Pisa, Italy. CEUR Workshop Proceedings.