@inproceedings{caswell-etal-2019-tagged,
title = "Tagged Back-Translation",
author = "Caswell, Isaac and
Chelba, Ciprian and
Grangier, David",
booktitle = "Proceedings of the Fourth Conference on Machine Translation (Volume 1: Research Papers)",
month = aug,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-5206",
doi = "10.18653/v1/W19-5206",
pages = "53--63",
abstract = "Recent work in Neural Machine Translation (NMT) has shown significant quality gains from noised-beam decoding during back-translation, a method to generate synthetic parallel data. We show that the main role of such synthetic noise is not to diversify the source side, as previously suggested, but simply to indicate to the model that the given source is synthetic. We propose a simpler alternative to noising techniques, consisting of tagging back-translated source sentences with an extra token. Our results on WMT outperform noised back-translation in English-Romanian and match performance on English-German, redefining the state-of-the-art on the former.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="caswell-etal-2019-tagged">
<titleInfo>
<title>Tagged Back-Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Isaac</namePart>
<namePart type="family">Caswell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ciprian</namePart>
<namePart type="family">Chelba</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Grangier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Conference on Machine Translation (Volume 1: Research Papers)</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recent work in Neural Machine Translation (NMT) has shown significant quality gains from noised-beam decoding during back-translation, a method to generate synthetic parallel data. We show that the main role of such synthetic noise is not to diversify the source side, as previously suggested, but simply to indicate to the model that the given source is synthetic. We propose a simpler alternative to noising techniques, consisting of tagging back-translated source sentences with an extra token. Our results on WMT outperform noised back-translation in English-Romanian and match performance on English-German, redefining the state-of-the-art on the former.</abstract>
<identifier type="citekey">caswell-etal-2019-tagged</identifier>
<identifier type="doi">10.18653/v1/W19-5206</identifier>
<location>
<url>https://aclanthology.org/W19-5206</url>
</location>
<part>
<date>2019-08</date>
<extent unit="page">
<start>53</start>
<end>63</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Tagged Back-Translation
%A Caswell, Isaac
%A Chelba, Ciprian
%A Grangier, David
%S Proceedings of the Fourth Conference on Machine Translation (Volume 1: Research Papers)
%D 2019
%8 August
%I Association for Computational Linguistics
%C Florence, Italy
%F caswell-etal-2019-tagged
%X Recent work in Neural Machine Translation (NMT) has shown significant quality gains from noised-beam decoding during back-translation, a method to generate synthetic parallel data. We show that the main role of such synthetic noise is not to diversify the source side, as previously suggested, but simply to indicate to the model that the given source is synthetic. We propose a simpler alternative to noising techniques, consisting of tagging back-translated source sentences with an extra token. Our results on WMT outperform noised back-translation in English-Romanian and match performance on English-German, redefining the state-of-the-art on the former.
%R 10.18653/v1/W19-5206
%U https://aclanthology.org/W19-5206
%U https://doi.org/10.18653/v1/W19-5206
%P 53-63
Markdown (Informal)
[Tagged Back-Translation](https://aclanthology.org/W19-5206) (Caswell et al., WMT 2019)
ACL
- Isaac Caswell, Ciprian Chelba, and David Grangier. 2019. Tagged Back-Translation. In Proceedings of the Fourth Conference on Machine Translation (Volume 1: Research Papers), pages 53–63, Florence, Italy. Association for Computational Linguistics.