@inproceedings{imai-etal-2025-evaluating,
title = "Evaluating Open-Source {ASR} Systems: Performance Across Diverse Audio Conditions and Error Correction Methods",
author = "Imai, Saki and
Chowdhury, Tahiya and
Stent, Amanda J.",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.coling-main.336/",
pages = "5027--5039",
abstract = "Despite significant advances in automatic speech recognition (ASR) accuracy, challenges remain. Naturally occurring conversation often involves multiple overlapping speakers, of different ages, accents and genders, as well as noisy environments and suboptimal audio recording equipment, all of which reduce ASR accuracy. In this study, we evaluate the accuracy of state of the art open source ASR systems across diverse conversational speech datasets, examining the impact of audio and speaker characteristics on WER. We then explore the potential of ASR ensembling and post-ASR correction methods to improve transcription accuracy. Our findings emphasize the need for robust error correction techniques and of continuing to address demographic biases to enhance ASR performance and inclusivity."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="imai-etal-2025-evaluating">
<titleInfo>
<title>Evaluating Open-Source ASR Systems: Performance Across Diverse Audio Conditions and Error Correction Methods</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saki</namePart>
<namePart type="family">Imai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tahiya</namePart>
<namePart type="family">Chowdhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amanda</namePart>
<namePart type="given">J</namePart>
<namePart type="family">Stent</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 31st International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="given">Di</namePart>
<namePart type="family">Eugenio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Schockaert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Despite significant advances in automatic speech recognition (ASR) accuracy, challenges remain. Naturally occurring conversation often involves multiple overlapping speakers, of different ages, accents and genders, as well as noisy environments and suboptimal audio recording equipment, all of which reduce ASR accuracy. In this study, we evaluate the accuracy of state of the art open source ASR systems across diverse conversational speech datasets, examining the impact of audio and speaker characteristics on WER. We then explore the potential of ASR ensembling and post-ASR correction methods to improve transcription accuracy. Our findings emphasize the need for robust error correction techniques and of continuing to address demographic biases to enhance ASR performance and inclusivity.</abstract>
<identifier type="citekey">imai-etal-2025-evaluating</identifier>
<location>
<url>https://aclanthology.org/2025.coling-main.336/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>5027</start>
<end>5039</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluating Open-Source ASR Systems: Performance Across Diverse Audio Conditions and Error Correction Methods
%A Imai, Saki
%A Chowdhury, Tahiya
%A Stent, Amanda J.
%Y Rambow, Owen
%Y Wanner, Leo
%Y Apidianaki, Marianna
%Y Al-Khalifa, Hend
%Y Eugenio, Barbara Di
%Y Schockaert, Steven
%S Proceedings of the 31st International Conference on Computational Linguistics
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F imai-etal-2025-evaluating
%X Despite significant advances in automatic speech recognition (ASR) accuracy, challenges remain. Naturally occurring conversation often involves multiple overlapping speakers, of different ages, accents and genders, as well as noisy environments and suboptimal audio recording equipment, all of which reduce ASR accuracy. In this study, we evaluate the accuracy of state of the art open source ASR systems across diverse conversational speech datasets, examining the impact of audio and speaker characteristics on WER. We then explore the potential of ASR ensembling and post-ASR correction methods to improve transcription accuracy. Our findings emphasize the need for robust error correction techniques and of continuing to address demographic biases to enhance ASR performance and inclusivity.
%U https://aclanthology.org/2025.coling-main.336/
%P 5027-5039
Markdown (Informal)
[Evaluating Open-Source ASR Systems: Performance Across Diverse Audio Conditions and Error Correction Methods](https://aclanthology.org/2025.coling-main.336/) (Imai et al., COLING 2025)
ACL