@inproceedings{xue-etal-2020-robust,
title = "Robust Neural Machine Translation with {ASR} Errors",
author = "Xue, Haiyang and
Feng, Yang and
Gu, Shuhao and
Chen, Wei",
editor = "Wu, Hua and
Cherry, Colin and
Huang, Liang and
He, Zhongjun and
Liberman, Mark and
Cross, James and
Liu, Yang",
booktitle = "Proceedings of the First Workshop on Automatic Simultaneous Translation",
month = jul,
year = "2020",
address = "Seattle, Washington",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.autosimtrans-1.3",
doi = "10.18653/v1/2020.autosimtrans-1.3",
pages = "15--23",
abstract = "In many practical applications, neural machine translation systems have to deal with the input from automatic speech recognition (ASR) systems which may contain a certain number of errors. This leads to two problems which degrade translation performance. One is the discrepancy between the training and testing data and the other is the translation error caused by the input errors may ruin the whole translation. In this paper, we propose a method to handle the two problems so as to generate robust translation to ASR errors. First, we simulate ASR errors in the training data so that the data distribution in the training and test is consistent. Second, we focus on ASR errors on homophone words and words with similar pronunciation and make use of their pronunciation information to help the translation model to recover from the input errors. Experiments on two Chinese-English data sets show that our method is more robust to input errors and can outperform the strong Transformer baseline significantly.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="xue-etal-2020-robust">
<titleInfo>
<title>Robust Neural Machine Translation with ASR Errors</title>
</titleInfo>
<name type="personal">
<namePart type="given">Haiyang</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Feng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shuhao</namePart>
<namePart type="family">Gu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Automatic Simultaneous Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hua</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Colin</namePart>
<namePart type="family">Cherry</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liang</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhongjun</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Liberman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Cross</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, Washington</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In many practical applications, neural machine translation systems have to deal with the input from automatic speech recognition (ASR) systems which may contain a certain number of errors. This leads to two problems which degrade translation performance. One is the discrepancy between the training and testing data and the other is the translation error caused by the input errors may ruin the whole translation. In this paper, we propose a method to handle the two problems so as to generate robust translation to ASR errors. First, we simulate ASR errors in the training data so that the data distribution in the training and test is consistent. Second, we focus on ASR errors on homophone words and words with similar pronunciation and make use of their pronunciation information to help the translation model to recover from the input errors. Experiments on two Chinese-English data sets show that our method is more robust to input errors and can outperform the strong Transformer baseline significantly.</abstract>
<identifier type="citekey">xue-etal-2020-robust</identifier>
<identifier type="doi">10.18653/v1/2020.autosimtrans-1.3</identifier>
<location>
<url>https://aclanthology.org/2020.autosimtrans-1.3</url>
</location>
<part>
<date>2020-07</date>
<extent unit="page">
<start>15</start>
<end>23</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Robust Neural Machine Translation with ASR Errors
%A Xue, Haiyang
%A Feng, Yang
%A Gu, Shuhao
%A Chen, Wei
%Y Wu, Hua
%Y Cherry, Colin
%Y Huang, Liang
%Y He, Zhongjun
%Y Liberman, Mark
%Y Cross, James
%Y Liu, Yang
%S Proceedings of the First Workshop on Automatic Simultaneous Translation
%D 2020
%8 July
%I Association for Computational Linguistics
%C Seattle, Washington
%F xue-etal-2020-robust
%X In many practical applications, neural machine translation systems have to deal with the input from automatic speech recognition (ASR) systems which may contain a certain number of errors. This leads to two problems which degrade translation performance. One is the discrepancy between the training and testing data and the other is the translation error caused by the input errors may ruin the whole translation. In this paper, we propose a method to handle the two problems so as to generate robust translation to ASR errors. First, we simulate ASR errors in the training data so that the data distribution in the training and test is consistent. Second, we focus on ASR errors on homophone words and words with similar pronunciation and make use of their pronunciation information to help the translation model to recover from the input errors. Experiments on two Chinese-English data sets show that our method is more robust to input errors and can outperform the strong Transformer baseline significantly.
%R 10.18653/v1/2020.autosimtrans-1.3
%U https://aclanthology.org/2020.autosimtrans-1.3
%U https://doi.org/10.18653/v1/2020.autosimtrans-1.3
%P 15-23
Markdown (Informal)
[Robust Neural Machine Translation with ASR Errors](https://aclanthology.org/2020.autosimtrans-1.3) (Xue et al., AutoSimTrans 2020)
ACL
- Haiyang Xue, Yang Feng, Shuhao Gu, and Wei Chen. 2020. Robust Neural Machine Translation with ASR Errors. In Proceedings of the First Workshop on Automatic Simultaneous Translation, pages 15–23, Seattle, Washington. Association for Computational Linguistics.