@inproceedings{sugiyama-yoshinaga-2019-data,
title = "Data augmentation using back-translation for context-aware neural machine translation",
author = "Sugiyama, Amane and
Yoshinaga, Naoki",
editor = "Popescu-Belis, Andrei and
Lo{\'a}iciga, Sharid and
Hardmeier, Christian and
Xiong, Deyi",
booktitle = "Proceedings of the Fourth Workshop on Discourse in Machine Translation (DiscoMT 2019)",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D19-6504",
doi = "10.18653/v1/D19-6504",
pages = "35--44",
abstract = "A single sentence does not always convey information that is enough to translate it into other languages. Some target languages need to add or specialize words that are omitted or ambiguous in the source languages (e.g, zero pronouns in translating Japanese to English or epicene pronouns in translating English to French). To translate such ambiguous sentences, we need contexts beyond a single sentence, and have so far explored context-aware neural machine translation (NMT). However, a large amount of parallel corpora is not easily available to train accurate context-aware NMT models. In this study, we first obtain large-scale pseudo parallel corpora by back-translating monolingual data, and then investigate its impact on the translation accuracy of context-aware NMT models. We evaluated context-aware NMT models trained with small parallel corpora and the large-scale pseudo parallel corpora on English-Japanese and English-French datasets to demonstrate the large impact of the data augmentation for context-aware NMT models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sugiyama-yoshinaga-2019-data">
<titleInfo>
<title>Data augmentation using back-translation for context-aware neural machine translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Amane</namePart>
<namePart type="family">Sugiyama</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naoki</namePart>
<namePart type="family">Yoshinaga</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on Discourse in Machine Translation (DiscoMT 2019)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andrei</namePart>
<namePart type="family">Popescu-Belis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sharid</namePart>
<namePart type="family">Loáiciga</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Hardmeier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Deyi</namePart>
<namePart type="family">Xiong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hong Kong, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>A single sentence does not always convey information that is enough to translate it into other languages. Some target languages need to add or specialize words that are omitted or ambiguous in the source languages (e.g, zero pronouns in translating Japanese to English or epicene pronouns in translating English to French). To translate such ambiguous sentences, we need contexts beyond a single sentence, and have so far explored context-aware neural machine translation (NMT). However, a large amount of parallel corpora is not easily available to train accurate context-aware NMT models. In this study, we first obtain large-scale pseudo parallel corpora by back-translating monolingual data, and then investigate its impact on the translation accuracy of context-aware NMT models. We evaluated context-aware NMT models trained with small parallel corpora and the large-scale pseudo parallel corpora on English-Japanese and English-French datasets to demonstrate the large impact of the data augmentation for context-aware NMT models.</abstract>
<identifier type="citekey">sugiyama-yoshinaga-2019-data</identifier>
<identifier type="doi">10.18653/v1/D19-6504</identifier>
<location>
<url>https://aclanthology.org/D19-6504</url>
</location>
<part>
<date>2019-11</date>
<extent unit="page">
<start>35</start>
<end>44</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Data augmentation using back-translation for context-aware neural machine translation
%A Sugiyama, Amane
%A Yoshinaga, Naoki
%Y Popescu-Belis, Andrei
%Y Loáiciga, Sharid
%Y Hardmeier, Christian
%Y Xiong, Deyi
%S Proceedings of the Fourth Workshop on Discourse in Machine Translation (DiscoMT 2019)
%D 2019
%8 November
%I Association for Computational Linguistics
%C Hong Kong, China
%F sugiyama-yoshinaga-2019-data
%X A single sentence does not always convey information that is enough to translate it into other languages. Some target languages need to add or specialize words that are omitted or ambiguous in the source languages (e.g, zero pronouns in translating Japanese to English or epicene pronouns in translating English to French). To translate such ambiguous sentences, we need contexts beyond a single sentence, and have so far explored context-aware neural machine translation (NMT). However, a large amount of parallel corpora is not easily available to train accurate context-aware NMT models. In this study, we first obtain large-scale pseudo parallel corpora by back-translating monolingual data, and then investigate its impact on the translation accuracy of context-aware NMT models. We evaluated context-aware NMT models trained with small parallel corpora and the large-scale pseudo parallel corpora on English-Japanese and English-French datasets to demonstrate the large impact of the data augmentation for context-aware NMT models.
%R 10.18653/v1/D19-6504
%U https://aclanthology.org/D19-6504
%U https://doi.org/10.18653/v1/D19-6504
%P 35-44
Markdown (Informal)
[Data augmentation using back-translation for context-aware neural machine translation](https://aclanthology.org/D19-6504) (Sugiyama & Yoshinaga, DiscoMT 2019)
ACL