@inproceedings{coffey-cristia-2024-long,
title = "Long-Form Recordings to Study Children{'}s Language Input and Output in Under-Resourced Contexts",
author = "Coffey, Joseph R. and
Cristia, Alejandrina",
editor = "Mabuya, Rooweither and
Matfunjwa, Muzi and
Setaka, Mmasibidi and
van Zaanen, Menno",
booktitle = "Proceedings of the Fifth Workshop on Resources for African Indigenous Languages @ LREC-COLING 2024",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.rail-1.3",
pages = "20--31",
abstract = "A growing body of research suggests that young children{'}s early speech and language exposure is associated with later language development (including delays and diagnoses), school readiness, and academic performance. The last decade has seen increasing use of child-worn devices to collect long-form audio recordings by educators, economists, and developmental psychologists. The most commonly used system for analyzing this data is LENA, which was trained on North American English child-centered data and generates estimates of children{'}s speech-like vocalization counts, adult word counts, and child-adult turn counts. Recently, cheaper and open-source non-LENA alternatives with multilingual training have been proposed. Both kinds of systems have been employed in under-resourced, sometimes multilingual contexts, including Africa where access to printed or digital linguistic resources may be limited. In this paper, we describe each kind of system (LENA, non-LENA), provide information on audio data collected with them that is available for reuse, review evidence of the accuracy of extant automated analyses, and note potential strengths and shortcomings of their use in African communities.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="coffey-cristia-2024-long">
<titleInfo>
<title>Long-Form Recordings to Study Children’s Language Input and Output in Under-Resourced Contexts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="given">R</namePart>
<namePart type="family">Coffey</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alejandrina</namePart>
<namePart type="family">Cristia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Workshop on Resources for African Indigenous Languages @ LREC-COLING 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rooweither</namePart>
<namePart type="family">Mabuya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Muzi</namePart>
<namePart type="family">Matfunjwa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mmasibidi</namePart>
<namePart type="family">Setaka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Menno</namePart>
<namePart type="family">van Zaanen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>A growing body of research suggests that young children’s early speech and language exposure is associated with later language development (including delays and diagnoses), school readiness, and academic performance. The last decade has seen increasing use of child-worn devices to collect long-form audio recordings by educators, economists, and developmental psychologists. The most commonly used system for analyzing this data is LENA, which was trained on North American English child-centered data and generates estimates of children’s speech-like vocalization counts, adult word counts, and child-adult turn counts. Recently, cheaper and open-source non-LENA alternatives with multilingual training have been proposed. Both kinds of systems have been employed in under-resourced, sometimes multilingual contexts, including Africa where access to printed or digital linguistic resources may be limited. In this paper, we describe each kind of system (LENA, non-LENA), provide information on audio data collected with them that is available for reuse, review evidence of the accuracy of extant automated analyses, and note potential strengths and shortcomings of their use in African communities.</abstract>
<identifier type="citekey">coffey-cristia-2024-long</identifier>
<location>
<url>https://aclanthology.org/2024.rail-1.3</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>20</start>
<end>31</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Long-Form Recordings to Study Children’s Language Input and Output in Under-Resourced Contexts
%A Coffey, Joseph R.
%A Cristia, Alejandrina
%Y Mabuya, Rooweither
%Y Matfunjwa, Muzi
%Y Setaka, Mmasibidi
%Y van Zaanen, Menno
%S Proceedings of the Fifth Workshop on Resources for African Indigenous Languages @ LREC-COLING 2024
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F coffey-cristia-2024-long
%X A growing body of research suggests that young children’s early speech and language exposure is associated with later language development (including delays and diagnoses), school readiness, and academic performance. The last decade has seen increasing use of child-worn devices to collect long-form audio recordings by educators, economists, and developmental psychologists. The most commonly used system for analyzing this data is LENA, which was trained on North American English child-centered data and generates estimates of children’s speech-like vocalization counts, adult word counts, and child-adult turn counts. Recently, cheaper and open-source non-LENA alternatives with multilingual training have been proposed. Both kinds of systems have been employed in under-resourced, sometimes multilingual contexts, including Africa where access to printed or digital linguistic resources may be limited. In this paper, we describe each kind of system (LENA, non-LENA), provide information on audio data collected with them that is available for reuse, review evidence of the accuracy of extant automated analyses, and note potential strengths and shortcomings of their use in African communities.
%U https://aclanthology.org/2024.rail-1.3
%P 20-31
Markdown (Informal)
[Long-Form Recordings to Study Children’s Language Input and Output in Under-Resourced Contexts](https://aclanthology.org/2024.rail-1.3) (Coffey & Cristia, RAIL-WS 2024)
ACL