@inproceedings{osamura-etal-2018-using,
title = "Using Spoken Word Posterior Features in Neural Machine Translation",
author = "Osamura, Kaho and
Kano, Takatomo and
Sakti, Sakriani and
Sudoh, Katsuhito and
Nakamura, Satoshi",
editor = "Turchi, Marco and
Niehues, Jan and
Frederico, Marcello",
booktitle = "Proceedings of the 15th International Conference on Spoken Language Translation",
month = oct # " 29-30",
year = "2018",
address = "Brussels",
publisher = "International Conference on Spoken Language Translation",
url = "https://aclanthology.org/2018.iwslt-1.28",
pages = "189--195",
abstract = "A spoken language translation (ST) system consists of at least two modules: an automatic speech recognition (ASR) system and a machine translation (MT) system. In most cases, an MT is only trained and optimized using error-free text data. If the ASR makes errors, the translation accuracy will be greatly reduced. Existing studies have shown that training MT systems with ASR parameters or word lattices can improve the translation quality. However, such an extension requires a large change in standard MT systems, resulting in a complicated model that is hard to train. In this paper, a neural sequence-to-sequence ASR is used as feature processing that is trained to produce word posterior features given spoken utterances. The resulting probabilistic features are used to train a neural MT (NMT) with only a slight modification. Experimental results reveal that the proposed method improved up to 5.8 BLEU scores with synthesized speech or 4.3 BLEU scores with the natural speech in comparison with a conventional cascaded-based ST system that translates from the 1-best ASR candidates.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="osamura-etal-2018-using">
<titleInfo>
<title>Using Spoken Word Posterior Features in Neural Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kaho</namePart>
<namePart type="family">Osamura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Takatomo</namePart>
<namePart type="family">Kano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katsuhito</namePart>
<namePart type="family">Sudoh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Satoshi</namePart>
<namePart type="family">Nakamura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-oct 29-30</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th International Conference on Spoken Language Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Turchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Niehues</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Frederico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Conference on Spoken Language Translation</publisher>
<place>
<placeTerm type="text">Brussels</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>A spoken language translation (ST) system consists of at least two modules: an automatic speech recognition (ASR) system and a machine translation (MT) system. In most cases, an MT is only trained and optimized using error-free text data. If the ASR makes errors, the translation accuracy will be greatly reduced. Existing studies have shown that training MT systems with ASR parameters or word lattices can improve the translation quality. However, such an extension requires a large change in standard MT systems, resulting in a complicated model that is hard to train. In this paper, a neural sequence-to-sequence ASR is used as feature processing that is trained to produce word posterior features given spoken utterances. The resulting probabilistic features are used to train a neural MT (NMT) with only a slight modification. Experimental results reveal that the proposed method improved up to 5.8 BLEU scores with synthesized speech or 4.3 BLEU scores with the natural speech in comparison with a conventional cascaded-based ST system that translates from the 1-best ASR candidates.</abstract>
<identifier type="citekey">osamura-etal-2018-using</identifier>
<location>
<url>https://aclanthology.org/2018.iwslt-1.28</url>
</location>
<part>
<date>2018-oct 29-30</date>
<extent unit="page">
<start>189</start>
<end>195</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Using Spoken Word Posterior Features in Neural Machine Translation
%A Osamura, Kaho
%A Kano, Takatomo
%A Sakti, Sakriani
%A Sudoh, Katsuhito
%A Nakamura, Satoshi
%Y Turchi, Marco
%Y Niehues, Jan
%Y Frederico, Marcello
%S Proceedings of the 15th International Conference on Spoken Language Translation
%D 2018
%8 oct 29 30
%I International Conference on Spoken Language Translation
%C Brussels
%F osamura-etal-2018-using
%X A spoken language translation (ST) system consists of at least two modules: an automatic speech recognition (ASR) system and a machine translation (MT) system. In most cases, an MT is only trained and optimized using error-free text data. If the ASR makes errors, the translation accuracy will be greatly reduced. Existing studies have shown that training MT systems with ASR parameters or word lattices can improve the translation quality. However, such an extension requires a large change in standard MT systems, resulting in a complicated model that is hard to train. In this paper, a neural sequence-to-sequence ASR is used as feature processing that is trained to produce word posterior features given spoken utterances. The resulting probabilistic features are used to train a neural MT (NMT) with only a slight modification. Experimental results reveal that the proposed method improved up to 5.8 BLEU scores with synthesized speech or 4.3 BLEU scores with the natural speech in comparison with a conventional cascaded-based ST system that translates from the 1-best ASR candidates.
%U https://aclanthology.org/2018.iwslt-1.28
%P 189-195
Markdown (Informal)
[Using Spoken Word Posterior Features in Neural Machine Translation](https://aclanthology.org/2018.iwslt-1.28) (Osamura et al., IWSLT 2018)
ACL