@inproceedings{blocker-etal-2025-benchmarking,
title = "Benchmarking {I}si{X}hosa Automatic Speech Recognition and Machine Translation for Digital Health Provision",
author = "Blocker, Abby and
Meyer, Francois and
Biyabani, Ahmed and
Mwangama, Joyce and
Datay, Mohammed Ishaaq and
Malila, Bessie",
editor = "Ananiadou, Sophia and
Demner-Fushman, Dina and
Gupta, Deepak and
Thompson, Paul",
booktitle = "Proceedings of the Second Workshop on Patient-Oriented Language Processing (CL4Health)",
month = may,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.cl4health-1.14/",
doi = "10.18653/v1/2025.cl4health-1.14",
pages = "169--179",
ISBN = "979-8-89176-238-1",
abstract = "As digital health becomes more ubiquitous, people from different geographic regions are connected and there is thus a need for accurate language translation services. South Africa presents opportunity and need for digital health innovation, but implementing indigenous translation systems for digital health is difficult due to a lack of language resources. Understanding the accuracy of current models for use in medical translation of indigenous languages is crucial for designers looking to build quality digital health solutions. This paper presents a new dataset with audio and text of primary health consultations for automatic speech recognition and machine translation in South African English and the indigenous South African language of isiXhosa. We then evaluate the performance of well-established pretrained models on this dataset. We found that isiXhosa had limited support in speech recognition models and showed high, variable character error rates for transcription (26-70{\%}). For translation tasks, Google Cloud Translate and ChatGPT outperformed the other evaluated models, indicating large language models can have similar performance to dedicated machine translation models for low-resource language translation."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="blocker-etal-2025-benchmarking">
<titleInfo>
<title>Benchmarking IsiXhosa Automatic Speech Recognition and Machine Translation for Digital Health Provision</title>
</titleInfo>
<name type="personal">
<namePart type="given">Abby</namePart>
<namePart type="family">Blocker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francois</namePart>
<namePart type="family">Meyer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahmed</namePart>
<namePart type="family">Biyabani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Mwangama</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammed</namePart>
<namePart type="given">Ishaaq</namePart>
<namePart type="family">Datay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bessie</namePart>
<namePart type="family">Malila</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Patient-Oriented Language Processing (CL4Health)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="family">Ananiadou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dina</namePart>
<namePart type="family">Demner-Fushman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Deepak</namePart>
<namePart type="family">Gupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Thompson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-238-1</identifier>
</relatedItem>
<abstract>As digital health becomes more ubiquitous, people from different geographic regions are connected and there is thus a need for accurate language translation services. South Africa presents opportunity and need for digital health innovation, but implementing indigenous translation systems for digital health is difficult due to a lack of language resources. Understanding the accuracy of current models for use in medical translation of indigenous languages is crucial for designers looking to build quality digital health solutions. This paper presents a new dataset with audio and text of primary health consultations for automatic speech recognition and machine translation in South African English and the indigenous South African language of isiXhosa. We then evaluate the performance of well-established pretrained models on this dataset. We found that isiXhosa had limited support in speech recognition models and showed high, variable character error rates for transcription (26-70%). For translation tasks, Google Cloud Translate and ChatGPT outperformed the other evaluated models, indicating large language models can have similar performance to dedicated machine translation models for low-resource language translation.</abstract>
<identifier type="citekey">blocker-etal-2025-benchmarking</identifier>
<identifier type="doi">10.18653/v1/2025.cl4health-1.14</identifier>
<location>
<url>https://aclanthology.org/2025.cl4health-1.14/</url>
</location>
<part>
<date>2025-05</date>
<extent unit="page">
<start>169</start>
<end>179</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Benchmarking IsiXhosa Automatic Speech Recognition and Machine Translation for Digital Health Provision
%A Blocker, Abby
%A Meyer, Francois
%A Biyabani, Ahmed
%A Mwangama, Joyce
%A Datay, Mohammed Ishaaq
%A Malila, Bessie
%Y Ananiadou, Sophia
%Y Demner-Fushman, Dina
%Y Gupta, Deepak
%Y Thompson, Paul
%S Proceedings of the Second Workshop on Patient-Oriented Language Processing (CL4Health)
%D 2025
%8 May
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-238-1
%F blocker-etal-2025-benchmarking
%X As digital health becomes more ubiquitous, people from different geographic regions are connected and there is thus a need for accurate language translation services. South Africa presents opportunity and need for digital health innovation, but implementing indigenous translation systems for digital health is difficult due to a lack of language resources. Understanding the accuracy of current models for use in medical translation of indigenous languages is crucial for designers looking to build quality digital health solutions. This paper presents a new dataset with audio and text of primary health consultations for automatic speech recognition and machine translation in South African English and the indigenous South African language of isiXhosa. We then evaluate the performance of well-established pretrained models on this dataset. We found that isiXhosa had limited support in speech recognition models and showed high, variable character error rates for transcription (26-70%). For translation tasks, Google Cloud Translate and ChatGPT outperformed the other evaluated models, indicating large language models can have similar performance to dedicated machine translation models for low-resource language translation.
%R 10.18653/v1/2025.cl4health-1.14
%U https://aclanthology.org/2025.cl4health-1.14/
%U https://doi.org/10.18653/v1/2025.cl4health-1.14
%P 169-179
Markdown (Informal)
[Benchmarking IsiXhosa Automatic Speech Recognition and Machine Translation for Digital Health Provision](https://aclanthology.org/2025.cl4health-1.14/) (Blocker et al., CL4Health 2025)
ACL