@inproceedings{poncelas-etal-2019-combining,
title = "Combining {PBSMT} and {NMT} Back-translated Data for Efficient {NMT}",
author = "Poncelas, Alberto and
Popovi{\'c}, Maja and
Shterionov, Dimitar and
Maillette de Buy Wenniger, Gideon and
Way, Andy",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)",
month = sep,
year = "2019",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/R19-1107",
doi = "10.26615/978-954-452-056-4_107",
pages = "922--931",
abstract = "Neural Machine Translation (NMT) models achieve their best performance when large sets of parallel data are used for training. Consequently, techniques for augmenting the training set have become popular recently. One of these methods is back-translation, which consists on generating synthetic sentences by translating a set of monolingual, target-language sentences using a Machine Translation (MT) model. Generally, NMT models are used for back-translation. In this work, we analyze the performance of models when the training data is extended with synthetic data using different MT approaches. In particular we investigate back-translated data generated not only by NMT but also by Statistical Machine Translation (SMT) models and combinations of both. The results reveal that the models achieve the best performances when the training set is augmented with back-translated data created by merging different MT approaches.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="poncelas-etal-2019-combining">
<titleInfo>
<title>Combining PBSMT and NMT Back-translated Data for Efficient NMT</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alberto</namePart>
<namePart type="family">Poncelas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maja</namePart>
<namePart type="family">Popović</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dimitar</namePart>
<namePart type="family">Shterionov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gideon</namePart>
<namePart type="family">Maillette de Buy Wenniger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andy</namePart>
<namePart type="family">Way</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Neural Machine Translation (NMT) models achieve their best performance when large sets of parallel data are used for training. Consequently, techniques for augmenting the training set have become popular recently. One of these methods is back-translation, which consists on generating synthetic sentences by translating a set of monolingual, target-language sentences using a Machine Translation (MT) model. Generally, NMT models are used for back-translation. In this work, we analyze the performance of models when the training data is extended with synthetic data using different MT approaches. In particular we investigate back-translated data generated not only by NMT but also by Statistical Machine Translation (SMT) models and combinations of both. The results reveal that the models achieve the best performances when the training set is augmented with back-translated data created by merging different MT approaches.</abstract>
<identifier type="citekey">poncelas-etal-2019-combining</identifier>
<identifier type="doi">10.26615/978-954-452-056-4_107</identifier>
<location>
<url>https://aclanthology.org/R19-1107</url>
</location>
<part>
<date>2019-09</date>
<extent unit="page">
<start>922</start>
<end>931</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Combining PBSMT and NMT Back-translated Data for Efficient NMT
%A Poncelas, Alberto
%A Popović, Maja
%A Shterionov, Dimitar
%A Maillette de Buy Wenniger, Gideon
%A Way, Andy
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)
%D 2019
%8 September
%I INCOMA Ltd.
%C Varna, Bulgaria
%F poncelas-etal-2019-combining
%X Neural Machine Translation (NMT) models achieve their best performance when large sets of parallel data are used for training. Consequently, techniques for augmenting the training set have become popular recently. One of these methods is back-translation, which consists on generating synthetic sentences by translating a set of monolingual, target-language sentences using a Machine Translation (MT) model. Generally, NMT models are used for back-translation. In this work, we analyze the performance of models when the training data is extended with synthetic data using different MT approaches. In particular we investigate back-translated data generated not only by NMT but also by Statistical Machine Translation (SMT) models and combinations of both. The results reveal that the models achieve the best performances when the training set is augmented with back-translated data created by merging different MT approaches.
%R 10.26615/978-954-452-056-4_107
%U https://aclanthology.org/R19-1107
%U https://doi.org/10.26615/978-954-452-056-4_107
%P 922-931
Markdown (Informal)
[Combining PBSMT and NMT Back-translated Data for Efficient NMT](https://aclanthology.org/R19-1107) (Poncelas et al., RANLP 2019)
ACL
- Alberto Poncelas, Maja Popović, Dimitar Shterionov, Gideon Maillette de Buy Wenniger, and Andy Way. 2019. Combining PBSMT and NMT Back-translated Data for Efficient NMT. In Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019), pages 922–931, Varna, Bulgaria. INCOMA Ltd..