@inproceedings{kvapilikova-bojar-2023-boosting,
title = "Boosting Unsupervised Machine Translation with Pseudo-Parallel Data",
author = "Kvapil{\'\i}kov{\'a}, Ivana and
Bojar, Ond{\v{r}}ej",
editor = "Utiyama, Masao and
Wang, Rui",
booktitle = "Proceedings of Machine Translation Summit XIX, Vol. 1: Research Track",
month = sep,
year = "2023",
address = "Macau SAR, China",
publisher = "Asia-Pacific Association for Machine Translation",
url = "https://aclanthology.org/2023.mtsummit-research.12",
pages = "135--147",
abstract = "Even with the latest developments in deep learning and large-scale language modeling, the task of machine translation (MT) of low-resource languages remains a challenge. Neural MT systems can be trained in an unsupervised way without any translation resources but the quality lags behind, especially in truly low-resource conditions. We propose a training strategy that relies on pseudo-parallel sentence pairs mined from monolingual corpora in addition to synthetic sentence pairs back-translated from monolingual corpora. We experiment with different training schedules and reach an improvement of up to 14.5 BLEU points (English to Ukrainian) over a baseline trained on back-translated data only.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kvapilikova-bojar-2023-boosting">
<titleInfo>
<title>Boosting Unsupervised Machine Translation with Pseudo-Parallel Data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ivana</namePart>
<namePart type="family">Kvapilíková</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ondřej</namePart>
<namePart type="family">Bojar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of Machine Translation Summit XIX, Vol. 1: Research Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Masao</namePart>
<namePart type="family">Utiyama</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rui</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Asia-Pacific Association for Machine Translation</publisher>
<place>
<placeTerm type="text">Macau SAR, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Even with the latest developments in deep learning and large-scale language modeling, the task of machine translation (MT) of low-resource languages remains a challenge. Neural MT systems can be trained in an unsupervised way without any translation resources but the quality lags behind, especially in truly low-resource conditions. We propose a training strategy that relies on pseudo-parallel sentence pairs mined from monolingual corpora in addition to synthetic sentence pairs back-translated from monolingual corpora. We experiment with different training schedules and reach an improvement of up to 14.5 BLEU points (English to Ukrainian) over a baseline trained on back-translated data only.</abstract>
<identifier type="citekey">kvapilikova-bojar-2023-boosting</identifier>
<location>
<url>https://aclanthology.org/2023.mtsummit-research.12</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>135</start>
<end>147</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Boosting Unsupervised Machine Translation with Pseudo-Parallel Data
%A Kvapilíková, Ivana
%A Bojar, Ondřej
%Y Utiyama, Masao
%Y Wang, Rui
%S Proceedings of Machine Translation Summit XIX, Vol. 1: Research Track
%D 2023
%8 September
%I Asia-Pacific Association for Machine Translation
%C Macau SAR, China
%F kvapilikova-bojar-2023-boosting
%X Even with the latest developments in deep learning and large-scale language modeling, the task of machine translation (MT) of low-resource languages remains a challenge. Neural MT systems can be trained in an unsupervised way without any translation resources but the quality lags behind, especially in truly low-resource conditions. We propose a training strategy that relies on pseudo-parallel sentence pairs mined from monolingual corpora in addition to synthetic sentence pairs back-translated from monolingual corpora. We experiment with different training schedules and reach an improvement of up to 14.5 BLEU points (English to Ukrainian) over a baseline trained on back-translated data only.
%U https://aclanthology.org/2023.mtsummit-research.12
%P 135-147
Markdown (Informal)
[Boosting Unsupervised Machine Translation with Pseudo-Parallel Data](https://aclanthology.org/2023.mtsummit-research.12) (Kvapilíková & Bojar, MTSummit 2023)
ACL