@inproceedings{harris-etal-2024-modeling,
title = "Modeling Gender and Dialect Bias in Automatic Speech Recognition",
author = "Harris, Camille and
Mgbahurike, Chijioke and
Kumar, Neha and
Yang, Diyi",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.890",
pages = "15166--15184",
abstract = "Dialect and gender-based biases have become an area of concern in language-dependent AI systemsincluding around automatic speech recognition (ASR) which processes speech audio into text. These potential biases raise concern for discriminatory outcomes with AI systems depending on demographic- particularly gender discrimination against women, and racial discrimination against minorities with ethnic or cultural English dialects.As such we aim to evaluate the performance of ASR systems across different genders and across dialects of English. Concretely, we take a deep dive of the performance of ASR systems on men and women across four US-based English dialects: Standard American English (SAE), African American Vernacular English (AAVE), Chicano English, and Spanglish. To do this, we construct a labeled dataset of 13 hours of podcast audio, transcribed by speakers of the represented dialects. We then evaluate zero-shot performance of different automatic speech recognition models on our dataset, and further finetune models to better understand how finetuning can impact performance. Our work fills the gap of investigating possible gender disparities within underrepresented dialects.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="harris-etal-2024-modeling">
<titleInfo>
<title>Modeling Gender and Dialect Bias in Automatic Speech Recognition</title>
</titleInfo>
<name type="personal">
<namePart type="given">Camille</namePart>
<namePart type="family">Harris</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chijioke</namePart>
<namePart type="family">Mgbahurike</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Neha</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diyi</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Dialect and gender-based biases have become an area of concern in language-dependent AI systemsincluding around automatic speech recognition (ASR) which processes speech audio into text. These potential biases raise concern for discriminatory outcomes with AI systems depending on demographic- particularly gender discrimination against women, and racial discrimination against minorities with ethnic or cultural English dialects.As such we aim to evaluate the performance of ASR systems across different genders and across dialects of English. Concretely, we take a deep dive of the performance of ASR systems on men and women across four US-based English dialects: Standard American English (SAE), African American Vernacular English (AAVE), Chicano English, and Spanglish. To do this, we construct a labeled dataset of 13 hours of podcast audio, transcribed by speakers of the represented dialects. We then evaluate zero-shot performance of different automatic speech recognition models on our dataset, and further finetune models to better understand how finetuning can impact performance. Our work fills the gap of investigating possible gender disparities within underrepresented dialects.</abstract>
<identifier type="citekey">harris-etal-2024-modeling</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.890</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>15166</start>
<end>15184</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Modeling Gender and Dialect Bias in Automatic Speech Recognition
%A Harris, Camille
%A Mgbahurike, Chijioke
%A Kumar, Neha
%A Yang, Diyi
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F harris-etal-2024-modeling
%X Dialect and gender-based biases have become an area of concern in language-dependent AI systemsincluding around automatic speech recognition (ASR) which processes speech audio into text. These potential biases raise concern for discriminatory outcomes with AI systems depending on demographic- particularly gender discrimination against women, and racial discrimination against minorities with ethnic or cultural English dialects.As such we aim to evaluate the performance of ASR systems across different genders and across dialects of English. Concretely, we take a deep dive of the performance of ASR systems on men and women across four US-based English dialects: Standard American English (SAE), African American Vernacular English (AAVE), Chicano English, and Spanglish. To do this, we construct a labeled dataset of 13 hours of podcast audio, transcribed by speakers of the represented dialects. We then evaluate zero-shot performance of different automatic speech recognition models on our dataset, and further finetune models to better understand how finetuning can impact performance. Our work fills the gap of investigating possible gender disparities within underrepresented dialects.
%U https://aclanthology.org/2024.findings-emnlp.890
%P 15166-15184
Markdown (Informal)
[Modeling Gender and Dialect Bias in Automatic Speech Recognition](https://aclanthology.org/2024.findings-emnlp.890) (Harris et al., Findings 2024)
ACL