@inproceedings{munot-nenkova-2019-emotion,
title = "Emotion Impacts Speech Recognition Performance",
author = "Munot, Rushab and
Nenkova, Ani",
editor = "Kar, Sudipta and
Nadeem, Farah and
Burdick, Laura and
Durrett, Greg and
Han, Na-Rae",
booktitle = "Proceedings of the 2019 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Student Research Workshop",
month = jun,
year = "2019",
address = "Minneapolis, Minnesota",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/N19-3003/",
doi = "10.18653/v1/N19-3003",
pages = "16--21",
abstract = "It has been established that the performance of speech recognition systems depends on multiple factors including the lexical content, speaker identity and dialect. Here we use three English datasets of acted emotion to demonstrate that emotional content also impacts the performance of commercial systems. On two of the corpora, emotion is a bigger contributor to recognition errors than speaker identity and on two, neutral speech is recognized considerably better than emotional speech. We further evaluate the commercial systems on spontaneous interactions that contain portions of emotional speech. We propose and validate on the acted datasets, a method that allows us to evaluate the overall impact of emotion on recognition even when manual transcripts are not available. Using this method, we show that emotion in natural spontaneous dialogue is a less prominent but still significant factor in recognition accuracy."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="munot-nenkova-2019-emotion">
<titleInfo>
<title>Emotion Impacts Speech Recognition Performance</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rushab</namePart>
<namePart type="family">Munot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ani</namePart>
<namePart type="family">Nenkova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Student Research Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sudipta</namePart>
<namePart type="family">Kar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Farah</namePart>
<namePart type="family">Nadeem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laura</namePart>
<namePart type="family">Burdick</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Greg</namePart>
<namePart type="family">Durrett</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Na-Rae</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Minneapolis, Minnesota</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>It has been established that the performance of speech recognition systems depends on multiple factors including the lexical content, speaker identity and dialect. Here we use three English datasets of acted emotion to demonstrate that emotional content also impacts the performance of commercial systems. On two of the corpora, emotion is a bigger contributor to recognition errors than speaker identity and on two, neutral speech is recognized considerably better than emotional speech. We further evaluate the commercial systems on spontaneous interactions that contain portions of emotional speech. We propose and validate on the acted datasets, a method that allows us to evaluate the overall impact of emotion on recognition even when manual transcripts are not available. Using this method, we show that emotion in natural spontaneous dialogue is a less prominent but still significant factor in recognition accuracy.</abstract>
<identifier type="citekey">munot-nenkova-2019-emotion</identifier>
<identifier type="doi">10.18653/v1/N19-3003</identifier>
<location>
<url>https://aclanthology.org/N19-3003/</url>
</location>
<part>
<date>2019-06</date>
<extent unit="page">
<start>16</start>
<end>21</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Emotion Impacts Speech Recognition Performance
%A Munot, Rushab
%A Nenkova, Ani
%Y Kar, Sudipta
%Y Nadeem, Farah
%Y Burdick, Laura
%Y Durrett, Greg
%Y Han, Na-Rae
%S Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Student Research Workshop
%D 2019
%8 June
%I Association for Computational Linguistics
%C Minneapolis, Minnesota
%F munot-nenkova-2019-emotion
%X It has been established that the performance of speech recognition systems depends on multiple factors including the lexical content, speaker identity and dialect. Here we use three English datasets of acted emotion to demonstrate that emotional content also impacts the performance of commercial systems. On two of the corpora, emotion is a bigger contributor to recognition errors than speaker identity and on two, neutral speech is recognized considerably better than emotional speech. We further evaluate the commercial systems on spontaneous interactions that contain portions of emotional speech. We propose and validate on the acted datasets, a method that allows us to evaluate the overall impact of emotion on recognition even when manual transcripts are not available. Using this method, we show that emotion in natural spontaneous dialogue is a less prominent but still significant factor in recognition accuracy.
%R 10.18653/v1/N19-3003
%U https://aclanthology.org/N19-3003/
%U https://doi.org/10.18653/v1/N19-3003
%P 16-21
Markdown (Informal)
[Emotion Impacts Speech Recognition Performance](https://aclanthology.org/N19-3003/) (Munot & Nenkova, NAACL 2019)
ACL
- Rushab Munot and Ani Nenkova. 2019. Emotion Impacts Speech Recognition Performance. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Student Research Workshop, pages 16–21, Minneapolis, Minnesota. Association for Computational Linguistics.