@inproceedings{choy-etal-2026-fine,
title = "Fine-tuned speech representations track spoken language convergence to adult models in infants and children who are deaf/hard-of-hearing",
author = "Choy, Landon and
Khan, Ali Sartaz and
Patrizi, Sonia and
Ye, Daisy S. and
Gross, Julianna and
Cychosz, Margaret",
editor = "Ma, Martin Ziqiao and
Liu, Emmy and
Liu, Jing and
Chang, Tyler A. and
Fourtassi, Abdellah and
Warstadt, Alex and
Hahn, Michael and
Sun, Weiwei and
Shi, Freda",
booktitle = "Proceedings of the 1st Workshop on Computational Developmental Linguistics ({CDL})",
month = jul,
year = "2026",
address = "Grand Hyatt Manchester San Diego, 1 Market Pl, San Diego, CA 92101",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.cdl-1.8/",
pages = "27--36",
ISBN = "979-8-89176-428-6",
abstract = "Language development is characterized by a gradual convergence of children{'}s speech toward adult patterns. Measuring this process has traditionally required detailed transcription and language-specific expertise, limiting scalability across languages and populations. Here, we use fine-tuned speech embeddings to capture this convergence directly from the acoustic signal in longform, child-centered recordings, taken as children go about their daily lives. Using BabyHuBERT, we extracted embeddings from vocalizations of children who are deaf/hard-of-hearing and their female adult caregivers ($>$925 hrs. observation). Embedding distance between children and caregivers decreased with hearing age, controlling for pitch, indicating, as expected, that children{'}s speech patterns converge to caregivers over development. This single distance metric likewise related to multiple standardized measures of speech and language, from infancy through preschoolhood. These results suggest a path toward scalable, language-neutral assessment of spoken language development from children{'}s everyday lives."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="choy-etal-2026-fine">
<titleInfo>
<title>Fine-tuned speech representations track spoken language convergence to adult models in infants and children who are deaf/hard-of-hearing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Landon</namePart>
<namePart type="family">Choy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ali</namePart>
<namePart type="given">Sartaz</namePart>
<namePart type="family">Khan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sonia</namePart>
<namePart type="family">Patrizi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daisy</namePart>
<namePart type="given">S</namePart>
<namePart type="family">Ye</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julianna</namePart>
<namePart type="family">Gross</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Margaret</namePart>
<namePart type="family">Cychosz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Computational Developmental Linguistics (CDL)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Martin</namePart>
<namePart type="given">Ziqiao</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emmy</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jing</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tyler</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abdellah</namePart>
<namePart type="family">Fourtassi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alex</namePart>
<namePart type="family">Warstadt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Hahn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Weiwei</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Freda</namePart>
<namePart type="family">Shi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Grand Hyatt Manchester San Diego, 1 Market Pl, San Diego, CA 92101</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-428-6</identifier>
</relatedItem>
<abstract>Language development is characterized by a gradual convergence of children’s speech toward adult patterns. Measuring this process has traditionally required detailed transcription and language-specific expertise, limiting scalability across languages and populations. Here, we use fine-tuned speech embeddings to capture this convergence directly from the acoustic signal in longform, child-centered recordings, taken as children go about their daily lives. Using BabyHuBERT, we extracted embeddings from vocalizations of children who are deaf/hard-of-hearing and their female adult caregivers (>925 hrs. observation). Embedding distance between children and caregivers decreased with hearing age, controlling for pitch, indicating, as expected, that children’s speech patterns converge to caregivers over development. This single distance metric likewise related to multiple standardized measures of speech and language, from infancy through preschoolhood. These results suggest a path toward scalable, language-neutral assessment of spoken language development from children’s everyday lives.</abstract>
<identifier type="citekey">choy-etal-2026-fine</identifier>
<location>
<url>https://aclanthology.org/2026.cdl-1.8/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>27</start>
<end>36</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Fine-tuned speech representations track spoken language convergence to adult models in infants and children who are deaf/hard-of-hearing
%A Choy, Landon
%A Khan, Ali Sartaz
%A Patrizi, Sonia
%A Ye, Daisy S.
%A Gross, Julianna
%A Cychosz, Margaret
%Y Ma, Martin Ziqiao
%Y Liu, Emmy
%Y Liu, Jing
%Y Chang, Tyler A.
%Y Fourtassi, Abdellah
%Y Warstadt, Alex
%Y Hahn, Michael
%Y Sun, Weiwei
%Y Shi, Freda
%S Proceedings of the 1st Workshop on Computational Developmental Linguistics (CDL)
%D 2026
%8 July
%I Association for Computational Linguistics
%C Grand Hyatt Manchester San Diego, 1 Market Pl, San Diego, CA 92101
%@ 979-8-89176-428-6
%F choy-etal-2026-fine
%X Language development is characterized by a gradual convergence of children’s speech toward adult patterns. Measuring this process has traditionally required detailed transcription and language-specific expertise, limiting scalability across languages and populations. Here, we use fine-tuned speech embeddings to capture this convergence directly from the acoustic signal in longform, child-centered recordings, taken as children go about their daily lives. Using BabyHuBERT, we extracted embeddings from vocalizations of children who are deaf/hard-of-hearing and their female adult caregivers (>925 hrs. observation). Embedding distance between children and caregivers decreased with hearing age, controlling for pitch, indicating, as expected, that children’s speech patterns converge to caregivers over development. This single distance metric likewise related to multiple standardized measures of speech and language, from infancy through preschoolhood. These results suggest a path toward scalable, language-neutral assessment of spoken language development from children’s everyday lives.
%U https://aclanthology.org/2026.cdl-1.8/
%P 27-36
Markdown (Informal)
[Fine-tuned speech representations track spoken language convergence to adult models in infants and children who are deaf/hard-of-hearing](https://aclanthology.org/2026.cdl-1.8/) (Choy et al., CDL 2026)
ACL
- Landon Choy, Ali Sartaz Khan, Sonia Patrizi, Daisy S. Ye, Julianna Gross, and Margaret Cychosz. 2026. Fine-tuned speech representations track spoken language convergence to adult models in infants and children who are deaf/hard-of-hearing. In Proceedings of the 1st Workshop on Computational Developmental Linguistics (CDL), pages 27–36, Grand Hyatt Manchester San Diego, 1 Market Pl, San Diego, CA 92101. Association for Computational Linguistics.