@inproceedings{coto-solano-etal-2024-multilingual,
title = "Multilingual Models for {ASR} in Chibchan Languages",
author = "Coto-Solano, Rolando and
Kim, Tai Wan and
Jones, Alexander and
Lo{\'a}iciga, Sharid",
editor = "Duh, Kevin and
Gomez, Helena and
Bethard, Steven",
booktitle = "Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.naacl-long.471",
doi = "10.18653/v1/2024.naacl-long.471",
pages = "8521--8535",
abstract = "We present experiments on Automatic Speech Recognition (ASR) for Bribri and Cab{\'e}car, two languages from the Chibchan family. We fine-tune four ASR algorithms (Wav2Vec2, Whisper, MMS {\&} WavLM) to create monolingual models, with the Wav2Vec2 model demonstrating the best performance. We then proceed to use Wav2Vec2 for (1) experiments on training joint and transfer learning models for both languages, and (2) an analysis of the errors, with a focus on the transcription of tone. Results show effective transfer learning for both Bribri and Cab{\'e}car, but especially for Bribri. A post-processing spell checking step further reduced character and word error rates. As for the errors, tone is where the Bribri models make the most errors, whereas the simpler tonal system of Cab{\'e}car is better transcribed by the model. Our work contributes to developing better ASR technology, an important tool that could facilitate transcription, one of the major bottlenecks in language documentation efforts. Our work also assesses how existing pre-trained models and algorithms perform for genuine extremely low resource-languages.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="coto-solano-etal-2024-multilingual">
<titleInfo>
<title>Multilingual Models for ASR in Chibchan Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rolando</namePart>
<namePart type="family">Coto-Solano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tai</namePart>
<namePart type="given">Wan</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Jones</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sharid</namePart>
<namePart type="family">Loáiciga</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Duh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Gomez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present experiments on Automatic Speech Recognition (ASR) for Bribri and Cabécar, two languages from the Chibchan family. We fine-tune four ASR algorithms (Wav2Vec2, Whisper, MMS & WavLM) to create monolingual models, with the Wav2Vec2 model demonstrating the best performance. We then proceed to use Wav2Vec2 for (1) experiments on training joint and transfer learning models for both languages, and (2) an analysis of the errors, with a focus on the transcription of tone. Results show effective transfer learning for both Bribri and Cabécar, but especially for Bribri. A post-processing spell checking step further reduced character and word error rates. As for the errors, tone is where the Bribri models make the most errors, whereas the simpler tonal system of Cabécar is better transcribed by the model. Our work contributes to developing better ASR technology, an important tool that could facilitate transcription, one of the major bottlenecks in language documentation efforts. Our work also assesses how existing pre-trained models and algorithms perform for genuine extremely low resource-languages.</abstract>
<identifier type="citekey">coto-solano-etal-2024-multilingual</identifier>
<identifier type="doi">10.18653/v1/2024.naacl-long.471</identifier>
<location>
<url>https://aclanthology.org/2024.naacl-long.471</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>8521</start>
<end>8535</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multilingual Models for ASR in Chibchan Languages
%A Coto-Solano, Rolando
%A Kim, Tai Wan
%A Jones, Alexander
%A Loáiciga, Sharid
%Y Duh, Kevin
%Y Gomez, Helena
%Y Bethard, Steven
%S Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F coto-solano-etal-2024-multilingual
%X We present experiments on Automatic Speech Recognition (ASR) for Bribri and Cabécar, two languages from the Chibchan family. We fine-tune four ASR algorithms (Wav2Vec2, Whisper, MMS & WavLM) to create monolingual models, with the Wav2Vec2 model demonstrating the best performance. We then proceed to use Wav2Vec2 for (1) experiments on training joint and transfer learning models for both languages, and (2) an analysis of the errors, with a focus on the transcription of tone. Results show effective transfer learning for both Bribri and Cabécar, but especially for Bribri. A post-processing spell checking step further reduced character and word error rates. As for the errors, tone is where the Bribri models make the most errors, whereas the simpler tonal system of Cabécar is better transcribed by the model. Our work contributes to developing better ASR technology, an important tool that could facilitate transcription, one of the major bottlenecks in language documentation efforts. Our work also assesses how existing pre-trained models and algorithms perform for genuine extremely low resource-languages.
%R 10.18653/v1/2024.naacl-long.471
%U https://aclanthology.org/2024.naacl-long.471
%U https://doi.org/10.18653/v1/2024.naacl-long.471
%P 8521-8535
Markdown (Informal)
[Multilingual Models for ASR in Chibchan Languages](https://aclanthology.org/2024.naacl-long.471) (Coto-Solano et al., NAACL 2024)
ACL
- Rolando Coto-Solano, Tai Wan Kim, Alexander Jones, and Sharid Loáiciga. 2024. Multilingual Models for ASR in Chibchan Languages. In Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), pages 8521–8535, Mexico City, Mexico. Association for Computational Linguistics.