@inproceedings{ashungafac-etal-2025-afrispeech,
title = "{A}fri{S}peech-{M}ulti{B}ench: A Verticalized Multidomain Multicountry Benchmark Suite for {A}frican Accented {E}nglish {ASR}",
author = "Ashungafac, Gabrial Zencha and
Sanni, Mardhiyah and
Awobade, Busayo and
Gichamba, Alex and
Olatunji, Tobi",
editor = "Inui, Kentaro and
Sakti, Sakriani and
Wang, Haofen and
Wong, Derek F. and
Bhattacharyya, Pushpak and
Banerjee, Biplab and
Ekbal, Asif and
Chakraborty, Tanmoy and
Singh, Dhirendra Pratap",
booktitle = "Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics",
month = dec,
year = "2025",
address = "Mumbai, India",
publisher = "The Asian Federation of Natural Language Processing and The Association for Computational Linguistics",
url = "https://aclanthology.org/2025.ijcnlp-long.190/",
pages = "3642--3653",
ISBN = "979-8-89176-298-5",
abstract = "Recent advances in speech{-}enabled AI, including Google{'}s NotebookLM and OpenAI{'}s speech-to-speech API, are driving widespread interest in voice interfaces across sectors such as finance, health, agritech, legal services, and call{-}centers in the global north and south. Despite this momentum, there exists no publicly available application-specific model evaluation that caters to Africa{'}s linguistic diversity. We present $\textbf{AfriSpeech‑MultiBench}$, the first domain{-}specific evaluation suite for over 100 African English accents across 10+ countries and seven application domains: Finance, Legal, Medical, General dialogue, Call Center, Named Entities, and Hallucination Robustness. We benchmark a diverse range of open, closed, unimodal ASR and multimodal LLM-based speech recognition systems using both spontaneous and non-spontaneous speech conversations drawn from various open African accented English speech datasets. Our empirical analysis reveals systematic variation: open{-}source ASR excels in spontaneous speech contexts but degrades on noisy, non{-}native dialogue; multimodal LLMs are more accent{-}robust yet struggle with domain{-}specific named entities; proprietary models deliver high accuracy on clean speech but vary significantly by country and domain. Smaller models fine{-}tuned on African English achieve competitive accuracy with lower latency, a practical advantage for deployment. By releasing this benchmark, we empower practitioners and researchers to select voice technologies suited to African use{-}cases, fostering inclusive voice applications for undeserved communities."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ashungafac-etal-2025-afrispeech">
<titleInfo>
<title>AfriSpeech-MultiBench: A Verticalized Multidomain Multicountry Benchmark Suite for African Accented English ASR</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gabrial</namePart>
<namePart type="given">Zencha</namePart>
<namePart type="family">Ashungafac</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mardhiyah</namePart>
<namePart type="family">Sanni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Busayo</namePart>
<namePart type="family">Awobade</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alex</namePart>
<namePart type="family">Gichamba</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tobi</namePart>
<namePart type="family">Olatunji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haofen</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Derek</namePart>
<namePart type="given">F</namePart>
<namePart type="family">Wong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharyya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Biplab</namePart>
<namePart type="family">Banerjee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asif</namePart>
<namePart type="family">Ekbal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dhirendra</namePart>
<namePart type="given">Pratap</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>The Asian Federation of Natural Language Processing and The Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mumbai, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-298-5</identifier>
</relatedItem>
<abstract>Recent advances in speech-enabled AI, including Google’s NotebookLM and OpenAI’s speech-to-speech API, are driving widespread interest in voice interfaces across sectors such as finance, health, agritech, legal services, and call-centers in the global north and south. Despite this momentum, there exists no publicly available application-specific model evaluation that caters to Africa’s linguistic diversity. We present AfriSpeech‑MultiBench, the first domain-specific evaluation suite for over 100 African English accents across 10+ countries and seven application domains: Finance, Legal, Medical, General dialogue, Call Center, Named Entities, and Hallucination Robustness. We benchmark a diverse range of open, closed, unimodal ASR and multimodal LLM-based speech recognition systems using both spontaneous and non-spontaneous speech conversations drawn from various open African accented English speech datasets. Our empirical analysis reveals systematic variation: open-source ASR excels in spontaneous speech contexts but degrades on noisy, non-native dialogue; multimodal LLMs are more accent-robust yet struggle with domain-specific named entities; proprietary models deliver high accuracy on clean speech but vary significantly by country and domain. Smaller models fine-tuned on African English achieve competitive accuracy with lower latency, a practical advantage for deployment. By releasing this benchmark, we empower practitioners and researchers to select voice technologies suited to African use-cases, fostering inclusive voice applications for undeserved communities.</abstract>
<identifier type="citekey">ashungafac-etal-2025-afrispeech</identifier>
<location>
<url>https://aclanthology.org/2025.ijcnlp-long.190/</url>
</location>
<part>
<date>2025-12</date>
<extent unit="page">
<start>3642</start>
<end>3653</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T AfriSpeech-MultiBench: A Verticalized Multidomain Multicountry Benchmark Suite for African Accented English ASR
%A Ashungafac, Gabrial Zencha
%A Sanni, Mardhiyah
%A Awobade, Busayo
%A Gichamba, Alex
%A Olatunji, Tobi
%Y Inui, Kentaro
%Y Sakti, Sakriani
%Y Wang, Haofen
%Y Wong, Derek F.
%Y Bhattacharyya, Pushpak
%Y Banerjee, Biplab
%Y Ekbal, Asif
%Y Chakraborty, Tanmoy
%Y Singh, Dhirendra Pratap
%S Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics
%D 2025
%8 December
%I The Asian Federation of Natural Language Processing and The Association for Computational Linguistics
%C Mumbai, India
%@ 979-8-89176-298-5
%F ashungafac-etal-2025-afrispeech
%X Recent advances in speech-enabled AI, including Google’s NotebookLM and OpenAI’s speech-to-speech API, are driving widespread interest in voice interfaces across sectors such as finance, health, agritech, legal services, and call-centers in the global north and south. Despite this momentum, there exists no publicly available application-specific model evaluation that caters to Africa’s linguistic diversity. We present AfriSpeech‑MultiBench, the first domain-specific evaluation suite for over 100 African English accents across 10+ countries and seven application domains: Finance, Legal, Medical, General dialogue, Call Center, Named Entities, and Hallucination Robustness. We benchmark a diverse range of open, closed, unimodal ASR and multimodal LLM-based speech recognition systems using both spontaneous and non-spontaneous speech conversations drawn from various open African accented English speech datasets. Our empirical analysis reveals systematic variation: open-source ASR excels in spontaneous speech contexts but degrades on noisy, non-native dialogue; multimodal LLMs are more accent-robust yet struggle with domain-specific named entities; proprietary models deliver high accuracy on clean speech but vary significantly by country and domain. Smaller models fine-tuned on African English achieve competitive accuracy with lower latency, a practical advantage for deployment. By releasing this benchmark, we empower practitioners and researchers to select voice technologies suited to African use-cases, fostering inclusive voice applications for undeserved communities.
%U https://aclanthology.org/2025.ijcnlp-long.190/
%P 3642-3653
Markdown (Informal)
[AfriSpeech-MultiBench: A Verticalized Multidomain Multicountry Benchmark Suite for African Accented English ASR](https://aclanthology.org/2025.ijcnlp-long.190/) (Ashungafac et al., IJCNLP-AACL 2025)
ACL