@inproceedings{tatman-2017-gender,
title = "Gender and Dialect Bias in {Y}ou{T}ube{'}s Automatic Captions",
author = "Tatman, Rachael",
editor = "Hovy, Dirk and
Spruit, Shannon and
Mitchell, Margaret and
Bender, Emily M. and
Strube, Michael and
Wallach, Hanna",
booktitle = "Proceedings of the First {ACL} Workshop on Ethics in Natural Language Processing",
month = apr,
year = "2017",
address = "Valencia, Spain",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-1606",
doi = "10.18653/v1/W17-1606",
pages = "53--59",
abstract = "This project evaluates the accuracy of YouTube{'}s automatically-generated captions across two genders and five dialect groups. Speakers{'} dialect and gender was controlled for by using videos uploaded as part of the {``}accent tag challenge{''}, where speakers explicitly identify their language background. The results show robust differences in accuracy across both gender and dialect, with lower accuracy for 1) women and 2) speakers from Scotland. This finding builds on earlier research finding that speaker{'}s sociolinguistic identity may negatively impact their ability to use automatic speech recognition, and demonstrates the need for sociolinguistically-stratified validation of systems.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tatman-2017-gender">
<titleInfo>
<title>Gender and Dialect Bias in YouTube’s Automatic Captions</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rachael</namePart>
<namePart type="family">Tatman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First ACL Workshop on Ethics in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dirk</namePart>
<namePart type="family">Hovy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shannon</namePart>
<namePart type="family">Spruit</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Margaret</namePart>
<namePart type="family">Mitchell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emily</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Bender</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Strube</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hanna</namePart>
<namePart type="family">Wallach</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Valencia, Spain</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This project evaluates the accuracy of YouTube’s automatically-generated captions across two genders and five dialect groups. Speakers’ dialect and gender was controlled for by using videos uploaded as part of the “accent tag challenge”, where speakers explicitly identify their language background. The results show robust differences in accuracy across both gender and dialect, with lower accuracy for 1) women and 2) speakers from Scotland. This finding builds on earlier research finding that speaker’s sociolinguistic identity may negatively impact their ability to use automatic speech recognition, and demonstrates the need for sociolinguistically-stratified validation of systems.</abstract>
<identifier type="citekey">tatman-2017-gender</identifier>
<identifier type="doi">10.18653/v1/W17-1606</identifier>
<location>
<url>https://aclanthology.org/W17-1606</url>
</location>
<part>
<date>2017-04</date>
<extent unit="page">
<start>53</start>
<end>59</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Gender and Dialect Bias in YouTube’s Automatic Captions
%A Tatman, Rachael
%Y Hovy, Dirk
%Y Spruit, Shannon
%Y Mitchell, Margaret
%Y Bender, Emily M.
%Y Strube, Michael
%Y Wallach, Hanna
%S Proceedings of the First ACL Workshop on Ethics in Natural Language Processing
%D 2017
%8 April
%I Association for Computational Linguistics
%C Valencia, Spain
%F tatman-2017-gender
%X This project evaluates the accuracy of YouTube’s automatically-generated captions across two genders and five dialect groups. Speakers’ dialect and gender was controlled for by using videos uploaded as part of the “accent tag challenge”, where speakers explicitly identify their language background. The results show robust differences in accuracy across both gender and dialect, with lower accuracy for 1) women and 2) speakers from Scotland. This finding builds on earlier research finding that speaker’s sociolinguistic identity may negatively impact their ability to use automatic speech recognition, and demonstrates the need for sociolinguistically-stratified validation of systems.
%R 10.18653/v1/W17-1606
%U https://aclanthology.org/W17-1606
%U https://doi.org/10.18653/v1/W17-1606
%P 53-59
Markdown (Informal)
[Gender and Dialect Bias in YouTube’s Automatic Captions](https://aclanthology.org/W17-1606) (Tatman, EthNLP 2017)
ACL