@inproceedings{ngue-um-etal-2025-speech,
title = "Speech Technologies Datasets for {A}frican Under-Served Languages",
author = "Ngue Um, Emmanuel and
Tyers, Francis and
Ngo Tjomb, Eliette-Caroline Emilie and
Dibengue, Florus Landry and
Banoum Manguele, Blaise-Mathieu and
Djoulde, Blaise Abbo and
Nyambe A, Mathilde and
Atangana Eloundou, Brice Martial and
Ngami Kamagoua, Jeff Sterling and
Mpouda Avom, Jos{\'e} and
Nyobe, Zacharie and
Eloundou Eyenga, Emmanuel Giovanni and
Likwai, Andr{\'e}",
editor = "Lachler, Jordan and
Agyapong, Godfred and
Arppe, Antti and
Moeller, Sarah and
Chaudhary, Aditi and
Rijhwani, Shruti and
Rosenblum, Daisy",
booktitle = "Proceedings of the Eight Workshop on the Use of Computational Methods in the Study of Endangered Languages",
month = mar,
year = "2025",
address = "Honolulu, Hawaii, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.computel-main.9/",
pages = "82--90",
abstract = "The expansion of the speech technology sector has given rise to a novel economic model in language research, with the objective of developing speech datasets. This model is expanding to under-served African languages through collaborative efforts between industries, organisations, and the active participation of communities. This collaboration is yielding new datasets for machine learning, while also disclosing vulnerabilities and sociolinguistic discrepancies between industrialised and non-industrialised societies. A case study of a speech data collection camp that took place in September 2024 in Cameroon, involving representatives of 31 languages throughout the continent, illustrates both the prospects of the new economic model for research on under-served languages and the challenges of fair, effective, and responsible participation."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ngue-um-etal-2025-speech">
<titleInfo>
<title>Speech Technologies Datasets for African Under-Served Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Emmanuel</namePart>
<namePart type="family">Ngue Um</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francis</namePart>
<namePart type="family">Tyers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eliette-Caroline</namePart>
<namePart type="given">Emilie</namePart>
<namePart type="family">Ngo Tjomb</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Florus</namePart>
<namePart type="given">Landry</namePart>
<namePart type="family">Dibengue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Blaise-Mathieu</namePart>
<namePart type="family">Banoum Manguele</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Blaise</namePart>
<namePart type="given">Abbo</namePart>
<namePart type="family">Djoulde</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mathilde</namePart>
<namePart type="family">Nyambe A</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brice</namePart>
<namePart type="given">Martial</namePart>
<namePart type="family">Atangana Eloundou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jeff</namePart>
<namePart type="given">Sterling</namePart>
<namePart type="family">Ngami Kamagoua</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">José</namePart>
<namePart type="family">Mpouda Avom</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zacharie</namePart>
<namePart type="family">Nyobe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emmanuel</namePart>
<namePart type="given">Giovanni</namePart>
<namePart type="family">Eloundou Eyenga</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">André</namePart>
<namePart type="family">Likwai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eight Workshop on the Use of Computational Methods in the Study of Endangered Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Lachler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Godfred</namePart>
<namePart type="family">Agyapong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antti</namePart>
<namePart type="family">Arppe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sarah</namePart>
<namePart type="family">Moeller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aditi</namePart>
<namePart type="family">Chaudhary</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shruti</namePart>
<namePart type="family">Rijhwani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daisy</namePart>
<namePart type="family">Rosenblum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Honolulu, Hawaii, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The expansion of the speech technology sector has given rise to a novel economic model in language research, with the objective of developing speech datasets. This model is expanding to under-served African languages through collaborative efforts between industries, organisations, and the active participation of communities. This collaboration is yielding new datasets for machine learning, while also disclosing vulnerabilities and sociolinguistic discrepancies between industrialised and non-industrialised societies. A case study of a speech data collection camp that took place in September 2024 in Cameroon, involving representatives of 31 languages throughout the continent, illustrates both the prospects of the new economic model for research on under-served languages and the challenges of fair, effective, and responsible participation.</abstract>
<identifier type="citekey">ngue-um-etal-2025-speech</identifier>
<location>
<url>https://aclanthology.org/2025.computel-main.9/</url>
</location>
<part>
<date>2025-03</date>
<extent unit="page">
<start>82</start>
<end>90</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Speech Technologies Datasets for African Under-Served Languages
%A Ngue Um, Emmanuel
%A Tyers, Francis
%A Ngo Tjomb, Eliette-Caroline Emilie
%A Dibengue, Florus Landry
%A Banoum Manguele, Blaise-Mathieu
%A Djoulde, Blaise Abbo
%A Nyambe A, Mathilde
%A Atangana Eloundou, Brice Martial
%A Ngami Kamagoua, Jeff Sterling
%A Mpouda Avom, José
%A Nyobe, Zacharie
%A Eloundou Eyenga, Emmanuel Giovanni
%A Likwai, André
%Y Lachler, Jordan
%Y Agyapong, Godfred
%Y Arppe, Antti
%Y Moeller, Sarah
%Y Chaudhary, Aditi
%Y Rijhwani, Shruti
%Y Rosenblum, Daisy
%S Proceedings of the Eight Workshop on the Use of Computational Methods in the Study of Endangered Languages
%D 2025
%8 March
%I Association for Computational Linguistics
%C Honolulu, Hawaii, USA
%F ngue-um-etal-2025-speech
%X The expansion of the speech technology sector has given rise to a novel economic model in language research, with the objective of developing speech datasets. This model is expanding to under-served African languages through collaborative efforts between industries, organisations, and the active participation of communities. This collaboration is yielding new datasets for machine learning, while also disclosing vulnerabilities and sociolinguistic discrepancies between industrialised and non-industrialised societies. A case study of a speech data collection camp that took place in September 2024 in Cameroon, involving representatives of 31 languages throughout the continent, illustrates both the prospects of the new economic model for research on under-served languages and the challenges of fair, effective, and responsible participation.
%U https://aclanthology.org/2025.computel-main.9/
%P 82-90
Markdown (Informal)
[Speech Technologies Datasets for African Under-Served Languages](https://aclanthology.org/2025.computel-main.9/) (Ngue Um et al., ComputEL 2025)
ACL
- Emmanuel Ngue Um, Francis Tyers, Eliette-Caroline Emilie Ngo Tjomb, Florus Landry Dibengue, Blaise-Mathieu Banoum Manguele, Blaise Abbo Djoulde, Mathilde Nyambe A, Brice Martial Atangana Eloundou, Jeff Sterling Ngami Kamagoua, José Mpouda Avom, Zacharie Nyobe, Emmanuel Giovanni Eloundou Eyenga, and André Likwai. 2025. Speech Technologies Datasets for African Under-Served Languages. In Proceedings of the Eight Workshop on the Use of Computational Methods in the Study of Endangered Languages, pages 82–90, Honolulu, Hawaii, USA. Association for Computational Linguistics.