@InProceedings{tatman:2017:EthNLP,
  author    = {Tatman, Rachael},
  title     = {Gender and Dialect Bias in YouTube's Automatic Captions},
  booktitle = {Proceedings of the First ACL Workshop on Ethics in Natural Language Processing},
  month     = {April},
  year      = {2017},
  address   = {Valencia, Spain},
  publisher = {Association for Computational Linguistics},
  pages     = {53--59},
  abstract  = {This project evaluates the accuracy of YouTube's automatically-generated
	captions across two genders and five dialect groups. Speakers' dialect and
	gender was controlled for by using videos uploaded as part of the ``accent tag
	challenge", where speakers explicitly identify their language background. The
	results show robust differences in accuracy across both gender and dialect,
	with lower accuracy for 1) women and 2) speakers from Scotland. This finding
	builds on earlier research finding that speaker's sociolinguistic identity may
	negatively impact their ability to use automatic speech recognition, and
	demonstrates the need for sociolinguistically-stratified validation of systems.},
  url       = {http://www.aclweb.org/anthology/W17-1606}
}

