@inproceedings{justin-etal-2025-yodiv3,
title = "{Y}odi{V}3: {NLP} for {T}ogolese Languages with Eyaa-Tom Dataset and the Lom Metric",
author = "Justin, Bakoubolo Essowe and
Xegbe, Kodjo Fran{\c{c}}ois and
Essuman, Catherine Nana Nyaah and
Samuel, Afola Kossi Mawou{\'e}na",
editor = "Lignos, Constantine and
Abdulmumin, Idris and
Adelani, David",
booktitle = "Proceedings of the Sixth Workshop on African Natural Language Processing (AfricaNLP 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.africanlp-1.20/",
doi = "10.18653/v1/2025.africanlp-1.20",
pages = "143--149",
ISBN = "979-8-89176-257-2",
abstract = "Most of the 40+ languages spoken in Togo are severely under-represented in Natural Language Processing (NLP) resources. We present YodiV3, a comprehensive approach to developing NLP for ten Togolese languages (plus two major lingua francas) covering machine translation, speech recognition, text-to-speech, and language identification. We introduce Eyaa-Tom, a new multi-domain parallel corpus (religious, healthcare, financial, etc.) for these languages. We also propose the Lom metric, a scoring framework to quantify the AI-readiness of each language in terms of available resources. Our experiments demonstrate that leveraging large pretrained models (e.g.NLLB for translation, MMS for speech) with YodiV3 leads to significant improvements in low-resource translation and speech tasks. This work highlights the impact of integrating diverse data sources and pretrained models to bootstrap NLP for under-served languages, and outlines future steps for expanding coverage and capability."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="justin-etal-2025-yodiv3">
<titleInfo>
<title>YodiV3: NLP for Togolese Languages with Eyaa-Tom Dataset and the Lom Metric</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bakoubolo</namePart>
<namePart type="given">Essowe</namePart>
<namePart type="family">Justin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kodjo</namePart>
<namePart type="given">François</namePart>
<namePart type="family">Xegbe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Catherine</namePart>
<namePart type="given">Nana</namePart>
<namePart type="given">Nyaah</namePart>
<namePart type="family">Essuman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Afola</namePart>
<namePart type="given">Kossi</namePart>
<namePart type="given">Mawouéna</namePart>
<namePart type="family">Samuel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth Workshop on African Natural Language Processing (AfricaNLP 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Constantine</namePart>
<namePart type="family">Lignos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Idris</namePart>
<namePart type="family">Abdulmumin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Adelani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-257-2</identifier>
</relatedItem>
<abstract>Most of the 40+ languages spoken in Togo are severely under-represented in Natural Language Processing (NLP) resources. We present YodiV3, a comprehensive approach to developing NLP for ten Togolese languages (plus two major lingua francas) covering machine translation, speech recognition, text-to-speech, and language identification. We introduce Eyaa-Tom, a new multi-domain parallel corpus (religious, healthcare, financial, etc.) for these languages. We also propose the Lom metric, a scoring framework to quantify the AI-readiness of each language in terms of available resources. Our experiments demonstrate that leveraging large pretrained models (e.g.NLLB for translation, MMS for speech) with YodiV3 leads to significant improvements in low-resource translation and speech tasks. This work highlights the impact of integrating diverse data sources and pretrained models to bootstrap NLP for under-served languages, and outlines future steps for expanding coverage and capability.</abstract>
<identifier type="citekey">justin-etal-2025-yodiv3</identifier>
<identifier type="doi">10.18653/v1/2025.africanlp-1.20</identifier>
<location>
<url>https://aclanthology.org/2025.africanlp-1.20/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>143</start>
<end>149</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T YodiV3: NLP for Togolese Languages with Eyaa-Tom Dataset and the Lom Metric
%A Justin, Bakoubolo Essowe
%A Xegbe, Kodjo François
%A Essuman, Catherine Nana Nyaah
%A Samuel, Afola Kossi Mawouéna
%Y Lignos, Constantine
%Y Abdulmumin, Idris
%Y Adelani, David
%S Proceedings of the Sixth Workshop on African Natural Language Processing (AfricaNLP 2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-257-2
%F justin-etal-2025-yodiv3
%X Most of the 40+ languages spoken in Togo are severely under-represented in Natural Language Processing (NLP) resources. We present YodiV3, a comprehensive approach to developing NLP for ten Togolese languages (plus two major lingua francas) covering machine translation, speech recognition, text-to-speech, and language identification. We introduce Eyaa-Tom, a new multi-domain parallel corpus (religious, healthcare, financial, etc.) for these languages. We also propose the Lom metric, a scoring framework to quantify the AI-readiness of each language in terms of available resources. Our experiments demonstrate that leveraging large pretrained models (e.g.NLLB for translation, MMS for speech) with YodiV3 leads to significant improvements in low-resource translation and speech tasks. This work highlights the impact of integrating diverse data sources and pretrained models to bootstrap NLP for under-served languages, and outlines future steps for expanding coverage and capability.
%R 10.18653/v1/2025.africanlp-1.20
%U https://aclanthology.org/2025.africanlp-1.20/
%U https://doi.org/10.18653/v1/2025.africanlp-1.20
%P 143-149
Markdown (Informal)
[YodiV3: NLP for Togolese Languages with Eyaa-Tom Dataset and the Lom Metric](https://aclanthology.org/2025.africanlp-1.20/) (Justin et al., AfricaNLP 2025)
ACL