@inproceedings{bunzeck-zarriess-2026-child,
title = "Child-directed speech facilitates production, not comprehension, in {B}aby{LM}s",
author = "Bunzeck, Bastian and
Zarrie{\ss}, Sina",
editor = "Bonial, Claire and
Berzak, Yevgeni",
booktitle = "Proceedings of the 30th Conference on Computational Natural Language Learning",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.conll-main.14/",
pages = "227--249",
ISBN = "979-8-89176-410-1",
abstract = "Recent studies suggest that child-directed speech is not conducive to language learning in BabyLMs. However, current evaluations focus predominantly on comprehension and not production, which is central to usage-based theories of language acquisition which argue how CDS facilitates early language use through constructional ``frames'' (frequent lexical patterns with open slots). We introduce a novel generation-based evaluation inspired by such theories in form of a **frame-completion task**, and compare Llama models trained with CDS, the BabyLM corpus, and web-crawl data (FineWeb-edu) on comprehension benchmarks and our novel framework. Our results reveal a clear dissociation between models' comprehension and production capabilities: while FineWeb-trained models excel at minimal pairs, CDS-trained models produce grammatical completions substantially earlier in training and concentrate probability mass on appropriate slot-fillers. These findings show that comprehension benchmarks underestimate what CDS affords to BabyLMs."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bunzeck-zarriess-2026-child">
<titleInfo>
<title>Child-directed speech facilitates production, not comprehension, in BabyLMs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bastian</namePart>
<namePart type="family">Bunzeck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sina</namePart>
<namePart type="family">Zarrieß</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 30th Conference on Computational Natural Language Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Claire</namePart>
<namePart type="family">Bonial</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yevgeni</namePart>
<namePart type="family">Berzak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-410-1</identifier>
</relatedItem>
<abstract>Recent studies suggest that child-directed speech is not conducive to language learning in BabyLMs. However, current evaluations focus predominantly on comprehension and not production, which is central to usage-based theories of language acquisition which argue how CDS facilitates early language use through constructional “frames” (frequent lexical patterns with open slots). We introduce a novel generation-based evaluation inspired by such theories in form of a **frame-completion task**, and compare Llama models trained with CDS, the BabyLM corpus, and web-crawl data (FineWeb-edu) on comprehension benchmarks and our novel framework. Our results reveal a clear dissociation between models’ comprehension and production capabilities: while FineWeb-trained models excel at minimal pairs, CDS-trained models produce grammatical completions substantially earlier in training and concentrate probability mass on appropriate slot-fillers. These findings show that comprehension benchmarks underestimate what CDS affords to BabyLMs.</abstract>
<identifier type="citekey">bunzeck-zarriess-2026-child</identifier>
<location>
<url>https://aclanthology.org/2026.conll-main.14/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>227</start>
<end>249</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Child-directed speech facilitates production, not comprehension, in BabyLMs
%A Bunzeck, Bastian
%A Zarrieß, Sina
%Y Bonial, Claire
%Y Berzak, Yevgeni
%S Proceedings of the 30th Conference on Computational Natural Language Learning
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, USA
%@ 979-8-89176-410-1
%F bunzeck-zarriess-2026-child
%X Recent studies suggest that child-directed speech is not conducive to language learning in BabyLMs. However, current evaluations focus predominantly on comprehension and not production, which is central to usage-based theories of language acquisition which argue how CDS facilitates early language use through constructional “frames” (frequent lexical patterns with open slots). We introduce a novel generation-based evaluation inspired by such theories in form of a **frame-completion task**, and compare Llama models trained with CDS, the BabyLM corpus, and web-crawl data (FineWeb-edu) on comprehension benchmarks and our novel framework. Our results reveal a clear dissociation between models’ comprehension and production capabilities: while FineWeb-trained models excel at minimal pairs, CDS-trained models produce grammatical completions substantially earlier in training and concentrate probability mass on appropriate slot-fillers. These findings show that comprehension benchmarks underestimate what CDS affords to BabyLMs.
%U https://aclanthology.org/2026.conll-main.14/
%P 227-249
Markdown (Informal)
[Child-directed speech facilitates production, not comprehension, in BabyLMs](https://aclanthology.org/2026.conll-main.14/) (Bunzeck & Zarrieß, CoNLL 2026)
ACL