@inproceedings{mcnamee-duh-2023-extensive,
title = "An Extensive Exploration of Back-Translation in 60 Languages",
author = "McNamee, Paul and
Duh, Kevin",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-acl.518",
doi = "10.18653/v1/2023.findings-acl.518",
pages = "8166--8183",
abstract = "Back-translation is a data augmentation technique that has been shown to improve model quality through the creation of synthetic training bitext. Early studies showed the promise of the technique and follow on studies have produced additional refinements. We have undertaken a broad investigation using back-translation to train models from 60 languages into English; the majority of these languages are considered moderate- or low-resource languages. We observed consistent gains, though compared to prior work we saw conspicuous gains in quite a number of lower-resourced languages. We analyzed differences in translations between baseline and back-translation models, and observed many indications of improved translation quality. Translation of both rare and common terms is improved, and these improvements occur despite the less natural synthetic source-language text used in training.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mcnamee-duh-2023-extensive">
<titleInfo>
<title>An Extensive Exploration of Back-Translation in 60 Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">McNamee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Duh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Boyd-Graber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naoaki</namePart>
<namePart type="family">Okazaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Back-translation is a data augmentation technique that has been shown to improve model quality through the creation of synthetic training bitext. Early studies showed the promise of the technique and follow on studies have produced additional refinements. We have undertaken a broad investigation using back-translation to train models from 60 languages into English; the majority of these languages are considered moderate- or low-resource languages. We observed consistent gains, though compared to prior work we saw conspicuous gains in quite a number of lower-resourced languages. We analyzed differences in translations between baseline and back-translation models, and observed many indications of improved translation quality. Translation of both rare and common terms is improved, and these improvements occur despite the less natural synthetic source-language text used in training.</abstract>
<identifier type="citekey">mcnamee-duh-2023-extensive</identifier>
<identifier type="doi">10.18653/v1/2023.findings-acl.518</identifier>
<location>
<url>https://aclanthology.org/2023.findings-acl.518</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>8166</start>
<end>8183</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T An Extensive Exploration of Back-Translation in 60 Languages
%A McNamee, Paul
%A Duh, Kevin
%Y Rogers, Anna
%Y Boyd-Graber, Jordan
%Y Okazaki, Naoaki
%S Findings of the Association for Computational Linguistics: ACL 2023
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F mcnamee-duh-2023-extensive
%X Back-translation is a data augmentation technique that has been shown to improve model quality through the creation of synthetic training bitext. Early studies showed the promise of the technique and follow on studies have produced additional refinements. We have undertaken a broad investigation using back-translation to train models from 60 languages into English; the majority of these languages are considered moderate- or low-resource languages. We observed consistent gains, though compared to prior work we saw conspicuous gains in quite a number of lower-resourced languages. We analyzed differences in translations between baseline and back-translation models, and observed many indications of improved translation quality. Translation of both rare and common terms is improved, and these improvements occur despite the less natural synthetic source-language text used in training.
%R 10.18653/v1/2023.findings-acl.518
%U https://aclanthology.org/2023.findings-acl.518
%U https://doi.org/10.18653/v1/2023.findings-acl.518
%P 8166-8183
Markdown (Informal)
[An Extensive Exploration of Back-Translation in 60 Languages](https://aclanthology.org/2023.findings-acl.518) (McNamee & Duh, Findings 2023)
ACL