@inproceedings{gonzales-etal-2024-bk3at,
title = "{BK}3{AT}: Bangsamoro K-3 Children{'}s Speech Corpus for Developing Assessment Tools in the Bangsamoro Languages",
author = "Gonzales, Kiel D. and
Maranan, Jazzmin R. and
Santelices, Francis Paolo D. and
Renovalles, Edsel Jedd M. and
Macale, Nissan D. and
Palafox, Nicole Anne A. and
Mendoza, Jose Marie A.",
editor = "Melero, Maite and
Sakti, Sakriani and
Soria, Claudia",
booktitle = "Proceedings of the 3rd Annual Meeting of the Special Interest Group on Under-resourced Languages @ LREC-COLING 2024",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.sigul-1.8",
pages = "59--65",
abstract = "Bangsamoro languages are among the under-resourced languages in the Mindanao region in the Philippines. Moreover, there is no currently publicly available data for children{'}s speech on most of these languages. BK3AT children{'}s speech corpus is a corpus designed for creating speech technologies that could help facilitators and teachers in K-3 education. The corpus consists of 122 hours of children speech data across 10 languages: Bahasa Sug, Chavacano, English, Filipino, Iranun, Maguindanaon, Meranaw, Sinama, Teduray, and Yakan. Preliminary experiments using Wav2Vec-XLSR architecture have been done in fine-tuning the Tagalog and L2 English corpus subsets to develop automatic speech recognition backend for literacy assessment. Results from the experiments show low word error rates (WERs) for small-vocabulary and targeted domains.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gonzales-etal-2024-bk3at">
<titleInfo>
<title>BK3AT: Bangsamoro K-3 Children’s Speech Corpus for Developing Assessment Tools in the Bangsamoro Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kiel</namePart>
<namePart type="given">D</namePart>
<namePart type="family">Gonzales</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jazzmin</namePart>
<namePart type="given">R</namePart>
<namePart type="family">Maranan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francis</namePart>
<namePart type="given">Paolo</namePart>
<namePart type="given">D</namePart>
<namePart type="family">Santelices</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Edsel</namePart>
<namePart type="given">Jedd</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Renovalles</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nissan</namePart>
<namePart type="given">D</namePart>
<namePart type="family">Macale</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nicole</namePart>
<namePart type="given">Anne</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Palafox</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jose</namePart>
<namePart type="given">Marie</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Mendoza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 3rd Annual Meeting of the Special Interest Group on Under-resourced Languages @ LREC-COLING 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maite</namePart>
<namePart type="family">Melero</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claudia</namePart>
<namePart type="family">Soria</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Bangsamoro languages are among the under-resourced languages in the Mindanao region in the Philippines. Moreover, there is no currently publicly available data for children’s speech on most of these languages. BK3AT children’s speech corpus is a corpus designed for creating speech technologies that could help facilitators and teachers in K-3 education. The corpus consists of 122 hours of children speech data across 10 languages: Bahasa Sug, Chavacano, English, Filipino, Iranun, Maguindanaon, Meranaw, Sinama, Teduray, and Yakan. Preliminary experiments using Wav2Vec-XLSR architecture have been done in fine-tuning the Tagalog and L2 English corpus subsets to develop automatic speech recognition backend for literacy assessment. Results from the experiments show low word error rates (WERs) for small-vocabulary and targeted domains.</abstract>
<identifier type="citekey">gonzales-etal-2024-bk3at</identifier>
<location>
<url>https://aclanthology.org/2024.sigul-1.8</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>59</start>
<end>65</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BK3AT: Bangsamoro K-3 Children’s Speech Corpus for Developing Assessment Tools in the Bangsamoro Languages
%A Gonzales, Kiel D.
%A Maranan, Jazzmin R.
%A Santelices, Francis Paolo D.
%A Renovalles, Edsel Jedd M.
%A Macale, Nissan D.
%A Palafox, Nicole Anne A.
%A Mendoza, Jose Marie A.
%Y Melero, Maite
%Y Sakti, Sakriani
%Y Soria, Claudia
%S Proceedings of the 3rd Annual Meeting of the Special Interest Group on Under-resourced Languages @ LREC-COLING 2024
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F gonzales-etal-2024-bk3at
%X Bangsamoro languages are among the under-resourced languages in the Mindanao region in the Philippines. Moreover, there is no currently publicly available data for children’s speech on most of these languages. BK3AT children’s speech corpus is a corpus designed for creating speech technologies that could help facilitators and teachers in K-3 education. The corpus consists of 122 hours of children speech data across 10 languages: Bahasa Sug, Chavacano, English, Filipino, Iranun, Maguindanaon, Meranaw, Sinama, Teduray, and Yakan. Preliminary experiments using Wav2Vec-XLSR architecture have been done in fine-tuning the Tagalog and L2 English corpus subsets to develop automatic speech recognition backend for literacy assessment. Results from the experiments show low word error rates (WERs) for small-vocabulary and targeted domains.
%U https://aclanthology.org/2024.sigul-1.8
%P 59-65
Markdown (Informal)
[BK3AT: Bangsamoro K-3 Children’s Speech Corpus for Developing Assessment Tools in the Bangsamoro Languages](https://aclanthology.org/2024.sigul-1.8) (Gonzales et al., SIGUL-WS 2024)
ACL