@inproceedings{mirzaei-etal-2016-automatic,
title = "Automatic Speech Recognition Errors as a Predictor of {L}2 Listening Difficulties",
author = "Mirzaei, Maryam Sadat and
Meshgi, Kourosh and
Kawahara, Tatsuya",
editor = "Brunato, Dominique and
Dell{'}Orletta, Felice and
Venturi, Giulia and
Fran{\c{c}}ois, Thomas and
Blache, Philippe",
booktitle = "Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity ({CL}4{LC})",
month = dec,
year = "2016",
address = "Osaka, Japan",
publisher = "The COLING 2016 Organizing Committee",
url = "https://aclanthology.org/W16-4122",
pages = "192--201",
abstract = "This paper investigates the use of automatic speech recognition (ASR) errors as indicators of the second language (L2) learners{'} listening difficulties and in doing so strives to overcome the shortcomings of Partial and Synchronized Caption (PSC) system. PSC is a system that generates a partial caption including difficult words detected based on high speech rate, low frequency, and specificity. To improve the choice of words in this system, and explore a better method to detect speech challenges, ASR errors were investigated as a model of the L2 listener, hypothesizing that some of these errors are similar to those of language learners{'} when transcribing the videos. To investigate this hypothesis, ASR errors in transcription of several TED talks were analyzed and compared with PSC{'}s selected words. Both the overlapping and mismatching cases were analyzed to investigate possible improvement for the PSC system. Those ASR errors that were not detected by PSC as cases of learners{'} difficulties were further analyzed and classified into four categories: homophones, minimal pairs, breached boundaries and negatives. These errors were embedded into the baseline PSC to make the enhanced version and were evaluated in an experiment with L2 learners. The results indicated that the enhanced version, which encompasses the ASR errors addresses most of the L2 learners{'} difficulties and better assists them in comprehending challenging video segments as compared with the baseline.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mirzaei-etal-2016-automatic">
<titleInfo>
<title>Automatic Speech Recognition Errors as a Predictor of L2 Listening Difficulties</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maryam</namePart>
<namePart type="given">Sadat</namePart>
<namePart type="family">Mirzaei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kourosh</namePart>
<namePart type="family">Meshgi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tatsuya</namePart>
<namePart type="family">Kawahara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity (CL4LC)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dominique</namePart>
<namePart type="family">Brunato</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Felice</namePart>
<namePart type="family">Dell’Orletta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giulia</namePart>
<namePart type="family">Venturi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thomas</namePart>
<namePart type="family">François</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philippe</namePart>
<namePart type="family">Blache</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>The COLING 2016 Organizing Committee</publisher>
<place>
<placeTerm type="text">Osaka, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper investigates the use of automatic speech recognition (ASR) errors as indicators of the second language (L2) learners’ listening difficulties and in doing so strives to overcome the shortcomings of Partial and Synchronized Caption (PSC) system. PSC is a system that generates a partial caption including difficult words detected based on high speech rate, low frequency, and specificity. To improve the choice of words in this system, and explore a better method to detect speech challenges, ASR errors were investigated as a model of the L2 listener, hypothesizing that some of these errors are similar to those of language learners’ when transcribing the videos. To investigate this hypothesis, ASR errors in transcription of several TED talks were analyzed and compared with PSC’s selected words. Both the overlapping and mismatching cases were analyzed to investigate possible improvement for the PSC system. Those ASR errors that were not detected by PSC as cases of learners’ difficulties were further analyzed and classified into four categories: homophones, minimal pairs, breached boundaries and negatives. These errors were embedded into the baseline PSC to make the enhanced version and were evaluated in an experiment with L2 learners. The results indicated that the enhanced version, which encompasses the ASR errors addresses most of the L2 learners’ difficulties and better assists them in comprehending challenging video segments as compared with the baseline.</abstract>
<identifier type="citekey">mirzaei-etal-2016-automatic</identifier>
<location>
<url>https://aclanthology.org/W16-4122</url>
</location>
<part>
<date>2016-12</date>
<extent unit="page">
<start>192</start>
<end>201</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Automatic Speech Recognition Errors as a Predictor of L2 Listening Difficulties
%A Mirzaei, Maryam Sadat
%A Meshgi, Kourosh
%A Kawahara, Tatsuya
%Y Brunato, Dominique
%Y Dell’Orletta, Felice
%Y Venturi, Giulia
%Y François, Thomas
%Y Blache, Philippe
%S Proceedings of the Workshop on Computational Linguistics for Linguistic Complexity (CL4LC)
%D 2016
%8 December
%I The COLING 2016 Organizing Committee
%C Osaka, Japan
%F mirzaei-etal-2016-automatic
%X This paper investigates the use of automatic speech recognition (ASR) errors as indicators of the second language (L2) learners’ listening difficulties and in doing so strives to overcome the shortcomings of Partial and Synchronized Caption (PSC) system. PSC is a system that generates a partial caption including difficult words detected based on high speech rate, low frequency, and specificity. To improve the choice of words in this system, and explore a better method to detect speech challenges, ASR errors were investigated as a model of the L2 listener, hypothesizing that some of these errors are similar to those of language learners’ when transcribing the videos. To investigate this hypothesis, ASR errors in transcription of several TED talks were analyzed and compared with PSC’s selected words. Both the overlapping and mismatching cases were analyzed to investigate possible improvement for the PSC system. Those ASR errors that were not detected by PSC as cases of learners’ difficulties were further analyzed and classified into four categories: homophones, minimal pairs, breached boundaries and negatives. These errors were embedded into the baseline PSC to make the enhanced version and were evaluated in an experiment with L2 learners. The results indicated that the enhanced version, which encompasses the ASR errors addresses most of the L2 learners’ difficulties and better assists them in comprehending challenging video segments as compared with the baseline.
%U https://aclanthology.org/W16-4122
%P 192-201
Markdown (Informal)
[Automatic Speech Recognition Errors as a Predictor of L2 Listening Difficulties](https://aclanthology.org/W16-4122) (Mirzaei et al., CL4LC 2016)
ACL