@inproceedings{ren-etal-2020-retrieve,
title = "A Retrieve-and-Rewrite Initialization Method for Unsupervised Machine Translation",
author = "Ren, Shuo and
Wu, Yu and
Liu, Shujie and
Zhou, Ming and
Ma, Shuai",
editor = "Jurafsky, Dan and
Chai, Joyce and
Schluter, Natalie and
Tetreault, Joel",
booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.acl-main.320",
doi = "10.18653/v1/2020.acl-main.320",
pages = "3498--3504",
abstract = "The commonly used framework for unsupervised machine translation builds initial translation models of both translation directions, and then performs iterative back-translation to jointly boost their translation performance. The initialization stage is very important since bad initialization may wrongly squeeze the search space, and too much noise introduced in this stage may hurt the final performance. In this paper, we propose a novel retrieval and rewriting based method to better initialize unsupervised translation models. We first retrieve semantically comparable sentences from monolingual corpora of two languages and then rewrite the target side to minimize the semantic gap between the source and retrieved targets with a designed rewriting model. The rewritten sentence pairs are used to initialize SMT models which are used to generate pseudo data for two NMT models, followed by the iterative back-translation. Experiments show that our method can build better initial unsupervised translation models and improve the final translation performance by over 4 BLEU scores. Our code is released at \url{https://github.com/Imagist-Shuo/RRforUNMT.git}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ren-etal-2020-retrieve">
<titleInfo>
<title>A Retrieve-and-Rewrite Initialization Method for Unsupervised Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shuo</namePart>
<namePart type="family">Ren</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shujie</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ming</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shuai</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Jurafsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Chai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Natalie</namePart>
<namePart type="family">Schluter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joel</namePart>
<namePart type="family">Tetreault</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The commonly used framework for unsupervised machine translation builds initial translation models of both translation directions, and then performs iterative back-translation to jointly boost their translation performance. The initialization stage is very important since bad initialization may wrongly squeeze the search space, and too much noise introduced in this stage may hurt the final performance. In this paper, we propose a novel retrieval and rewriting based method to better initialize unsupervised translation models. We first retrieve semantically comparable sentences from monolingual corpora of two languages and then rewrite the target side to minimize the semantic gap between the source and retrieved targets with a designed rewriting model. The rewritten sentence pairs are used to initialize SMT models which are used to generate pseudo data for two NMT models, followed by the iterative back-translation. Experiments show that our method can build better initial unsupervised translation models and improve the final translation performance by over 4 BLEU scores. Our code is released at https://github.com/Imagist-Shuo/RRforUNMT.git.</abstract>
<identifier type="citekey">ren-etal-2020-retrieve</identifier>
<identifier type="doi">10.18653/v1/2020.acl-main.320</identifier>
<location>
<url>https://aclanthology.org/2020.acl-main.320</url>
</location>
<part>
<date>2020-07</date>
<extent unit="page">
<start>3498</start>
<end>3504</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Retrieve-and-Rewrite Initialization Method for Unsupervised Machine Translation
%A Ren, Shuo
%A Wu, Yu
%A Liu, Shujie
%A Zhou, Ming
%A Ma, Shuai
%Y Jurafsky, Dan
%Y Chai, Joyce
%Y Schluter, Natalie
%Y Tetreault, Joel
%S Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics
%D 2020
%8 July
%I Association for Computational Linguistics
%C Online
%F ren-etal-2020-retrieve
%X The commonly used framework for unsupervised machine translation builds initial translation models of both translation directions, and then performs iterative back-translation to jointly boost their translation performance. The initialization stage is very important since bad initialization may wrongly squeeze the search space, and too much noise introduced in this stage may hurt the final performance. In this paper, we propose a novel retrieval and rewriting based method to better initialize unsupervised translation models. We first retrieve semantically comparable sentences from monolingual corpora of two languages and then rewrite the target side to minimize the semantic gap between the source and retrieved targets with a designed rewriting model. The rewritten sentence pairs are used to initialize SMT models which are used to generate pseudo data for two NMT models, followed by the iterative back-translation. Experiments show that our method can build better initial unsupervised translation models and improve the final translation performance by over 4 BLEU scores. Our code is released at https://github.com/Imagist-Shuo/RRforUNMT.git.
%R 10.18653/v1/2020.acl-main.320
%U https://aclanthology.org/2020.acl-main.320
%U https://doi.org/10.18653/v1/2020.acl-main.320
%P 3498-3504
Markdown (Informal)
[A Retrieve-and-Rewrite Initialization Method for Unsupervised Machine Translation](https://aclanthology.org/2020.acl-main.320) (Ren et al., ACL 2020)
ACL