@inproceedings{justin-etal-2026-eyaa,
title = "Eyaa-Tom 26, Yodi-Mantissa and Lom Bench: A Community Benchmark for {TTS} in Local Languages",
author = "Justin, Bakoubolo Essowe and
Essuman, Catherine Nana Nyaah and
Agbobli, Messan and
Kansiwer, Ahoefa and
Doumeyan, Eli Jean and
Pato, Julie and
Timibe, Notou Your and
Agossou, Emile KOGBEDJI and
Bakouya, Guedela",
editor = "Chimoto, Everlyn Asiko and
Lignos, Constantine and
Muhammad, Shamsuddeen and
Abdulmumin, Idris and
Siro, Clemencia and
Adelani, David Ifeoluwa",
booktitle = "Proceedings of the 7th Workshop on {A}frican Natural Language Processing ({A}frica{NLP} 2026)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.africanlp-main.28/",
pages = "264--270",
ISBN = "979-8-89176-364-7",
abstract = "We present an extension of our previous work on multilingual NLP for Togolese languages by introducing new datasets, improved models, and a community-driven evaluation benchmark for Text-To-Speech (TTS). We expand the Eyaa-Tom multilingual corpus with additional speech data of about 26.9k recordings (30.9 hours) across 10 local languages, and incorporated 64.6k clips (46.6 hours) of Mozilla Common Voice contributions for Adja, Nawdm, Mina, and Tem to strengthen Automatic Speech Recognition (ASR) and speech synthesis. We detail how community contributors {--} including collaboration with a national TV journalist {--} helped collect and validate the Kaby{\`e} and French text, with an ethical compensation model in place. We fine-tune state-of-the-art models: OpenAI Whisper and faster-whisper, and Meta{'}s NLLB-200 model for machine translation across 11 languages (achieving 19.4 BLEU score for French{\textrightarrow}Ewe and 26.1 BLEU score for Kaby{\`e}{\textrightarrow}French). We also introduce the Lom Bench, a community-based benchmark where native speakers rate TTS output, indicating promising preliminary results in Mina and Togolese lingua franca french although further data is needed. We provide a comparative analysis of our results with recent multilingual systems, including Simba, Meta{'}s Omnilingual ASR, and UBC Toucan. Our work emphasizes practical pathways and how FAIR data sourcing and community participation can drive sustainable NLP development for underserved languages."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="justin-etal-2026-eyaa">
<titleInfo>
<title>Eyaa-Tom 26, Yodi-Mantissa and Lom Bench: A Community Benchmark for TTS in Local Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bakoubolo</namePart>
<namePart type="given">Essowe</namePart>
<namePart type="family">Justin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Catherine</namePart>
<namePart type="given">Nana</namePart>
<namePart type="given">Nyaah</namePart>
<namePart type="family">Essuman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Messan</namePart>
<namePart type="family">Agbobli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahoefa</namePart>
<namePart type="family">Kansiwer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eli</namePart>
<namePart type="given">Jean</namePart>
<namePart type="family">Doumeyan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julie</namePart>
<namePart type="family">Pato</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Notou</namePart>
<namePart type="given">Your</namePart>
<namePart type="family">Timibe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emile</namePart>
<namePart type="given">KOGBEDJI</namePart>
<namePart type="family">Agossou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guedela</namePart>
<namePart type="family">Bakouya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 7th Workshop on African Natural Language Processing (AfricaNLP 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Everlyn</namePart>
<namePart type="given">Asiko</namePart>
<namePart type="family">Chimoto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Constantine</namePart>
<namePart type="family">Lignos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shamsuddeen</namePart>
<namePart type="family">Muhammad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Idris</namePart>
<namePart type="family">Abdulmumin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Clemencia</namePart>
<namePart type="family">Siro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="given">Ifeoluwa</namePart>
<namePart type="family">Adelani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-364-7</identifier>
</relatedItem>
<abstract>We present an extension of our previous work on multilingual NLP for Togolese languages by introducing new datasets, improved models, and a community-driven evaluation benchmark for Text-To-Speech (TTS). We expand the Eyaa-Tom multilingual corpus with additional speech data of about 26.9k recordings (30.9 hours) across 10 local languages, and incorporated 64.6k clips (46.6 hours) of Mozilla Common Voice contributions for Adja, Nawdm, Mina, and Tem to strengthen Automatic Speech Recognition (ASR) and speech synthesis. We detail how community contributors – including collaboration with a national TV journalist – helped collect and validate the Kabyè and French text, with an ethical compensation model in place. We fine-tune state-of-the-art models: OpenAI Whisper and faster-whisper, and Meta’s NLLB-200 model for machine translation across 11 languages (achieving 19.4 BLEU score for French→Ewe and 26.1 BLEU score for Kabyè→French). We also introduce the Lom Bench, a community-based benchmark where native speakers rate TTS output, indicating promising preliminary results in Mina and Togolese lingua franca french although further data is needed. We provide a comparative analysis of our results with recent multilingual systems, including Simba, Meta’s Omnilingual ASR, and UBC Toucan. Our work emphasizes practical pathways and how FAIR data sourcing and community participation can drive sustainable NLP development for underserved languages.</abstract>
<identifier type="citekey">justin-etal-2026-eyaa</identifier>
<location>
<url>https://aclanthology.org/2026.africanlp-main.28/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>264</start>
<end>270</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Eyaa-Tom 26, Yodi-Mantissa and Lom Bench: A Community Benchmark for TTS in Local Languages
%A Justin, Bakoubolo Essowe
%A Essuman, Catherine Nana Nyaah
%A Agbobli, Messan
%A Kansiwer, Ahoefa
%A Doumeyan, Eli Jean
%A Pato, Julie
%A Timibe, Notou Your
%A Agossou, Emile KOGBEDJI
%A Bakouya, Guedela
%Y Chimoto, Everlyn Asiko
%Y Lignos, Constantine
%Y Muhammad, Shamsuddeen
%Y Abdulmumin, Idris
%Y Siro, Clemencia
%Y Adelani, David Ifeoluwa
%S Proceedings of the 7th Workshop on African Natural Language Processing (AfricaNLP 2026)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-364-7
%F justin-etal-2026-eyaa
%X We present an extension of our previous work on multilingual NLP for Togolese languages by introducing new datasets, improved models, and a community-driven evaluation benchmark for Text-To-Speech (TTS). We expand the Eyaa-Tom multilingual corpus with additional speech data of about 26.9k recordings (30.9 hours) across 10 local languages, and incorporated 64.6k clips (46.6 hours) of Mozilla Common Voice contributions for Adja, Nawdm, Mina, and Tem to strengthen Automatic Speech Recognition (ASR) and speech synthesis. We detail how community contributors – including collaboration with a national TV journalist – helped collect and validate the Kabyè and French text, with an ethical compensation model in place. We fine-tune state-of-the-art models: OpenAI Whisper and faster-whisper, and Meta’s NLLB-200 model for machine translation across 11 languages (achieving 19.4 BLEU score for French→Ewe and 26.1 BLEU score for Kabyè→French). We also introduce the Lom Bench, a community-based benchmark where native speakers rate TTS output, indicating promising preliminary results in Mina and Togolese lingua franca french although further data is needed. We provide a comparative analysis of our results with recent multilingual systems, including Simba, Meta’s Omnilingual ASR, and UBC Toucan. Our work emphasizes practical pathways and how FAIR data sourcing and community participation can drive sustainable NLP development for underserved languages.
%U https://aclanthology.org/2026.africanlp-main.28/
%P 264-270
Markdown (Informal)
[Eyaa-Tom 26, Yodi-Mantissa and Lom Bench: A Community Benchmark for TTS in Local Languages](https://aclanthology.org/2026.africanlp-main.28/) (Justin et al., AfricaNLP 2026)
ACL
- Bakoubolo Essowe Justin, Catherine Nana Nyaah Essuman, Messan Agbobli, Ahoefa Kansiwer, Eli Jean Doumeyan, Julie Pato, Notou Your Timibe, Emile KOGBEDJI Agossou, and Guedela Bakouya. 2026. Eyaa-Tom 26, Yodi-Mantissa and Lom Bench: A Community Benchmark for TTS in Local Languages. In Proceedings of the 7th Workshop on African Natural Language Processing (AfricaNLP 2026), pages 264–270, Rabat, Morocco. Association for Computational Linguistics.