@inproceedings{zheng-etal-2021-low,
title = "Low-Resource Machine Translation Using Cross-Lingual Language Model Pretraining",
author = "Zheng, Francis and
Reid, Machel and
Marrese-Taylor, Edison and
Matsuo, Yutaka",
booktitle = "Proceedings of the First Workshop on Natural Language Processing for Indigenous Languages of the Americas",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.americasnlp-1.26",
doi = "10.18653/v1/2021.americasnlp-1.26",
pages = "234--240",
abstract = {This paper describes UTokyo{'}s submission to the AmericasNLP 2021 Shared Task on machine translation systems for indigenous languages of the Americas. We present a low-resource machine translation system that improves translation accuracy using cross-lingual language model pretraining. Our system uses an mBART implementation of fairseq to pretrain on a large set of monolingual data from a diverse set of high-resource languages before finetuning on 10 low-resource indigenous American languages: Aymara, Bribri, Ash{\'a}ninka, Guaran{\'\i}, Wixarika, N{\'a}huatl, H{\~n}{\"a}h{\~n}u, Quechua, Shipibo-Konibo, and Rar{\'a}muri. On average, our system achieved BLEU scores that were 1.64 higher and chrF scores that were 0.0749 higher than the baseline.},
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zheng-etal-2021-low">
<titleInfo>
<title>Low-Resource Machine Translation Using Cross-Lingual Language Model Pretraining</title>
</titleInfo>
<name type="personal">
<namePart type="given">Francis</namePart>
<namePart type="family">Zheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Machel</namePart>
<namePart type="family">Reid</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Edison</namePart>
<namePart type="family">Marrese-Taylor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yutaka</namePart>
<namePart type="family">Matsuo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Natural Language Processing for Indigenous Languages of the Americas</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes UTokyo’s submission to the AmericasNLP 2021 Shared Task on machine translation systems for indigenous languages of the Americas. We present a low-resource machine translation system that improves translation accuracy using cross-lingual language model pretraining. Our system uses an mBART implementation of fairseq to pretrain on a large set of monolingual data from a diverse set of high-resource languages before finetuning on 10 low-resource indigenous American languages: Aymara, Bribri, Asháninka, Guaraní, Wixarika, Náhuatl, Hñähñu, Quechua, Shipibo-Konibo, and Rarámuri. On average, our system achieved BLEU scores that were 1.64 higher and chrF scores that were 0.0749 higher than the baseline.</abstract>
<identifier type="citekey">zheng-etal-2021-low</identifier>
<identifier type="doi">10.18653/v1/2021.americasnlp-1.26</identifier>
<location>
<url>https://aclanthology.org/2021.americasnlp-1.26</url>
</location>
<part>
<date>2021-06</date>
<extent unit="page">
<start>234</start>
<end>240</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Low-Resource Machine Translation Using Cross-Lingual Language Model Pretraining
%A Zheng, Francis
%A Reid, Machel
%A Marrese-Taylor, Edison
%A Matsuo, Yutaka
%S Proceedings of the First Workshop on Natural Language Processing for Indigenous Languages of the Americas
%D 2021
%8 June
%I Association for Computational Linguistics
%C Online
%F zheng-etal-2021-low
%X This paper describes UTokyo’s submission to the AmericasNLP 2021 Shared Task on machine translation systems for indigenous languages of the Americas. We present a low-resource machine translation system that improves translation accuracy using cross-lingual language model pretraining. Our system uses an mBART implementation of fairseq to pretrain on a large set of monolingual data from a diverse set of high-resource languages before finetuning on 10 low-resource indigenous American languages: Aymara, Bribri, Asháninka, Guaraní, Wixarika, Náhuatl, Hñähñu, Quechua, Shipibo-Konibo, and Rarámuri. On average, our system achieved BLEU scores that were 1.64 higher and chrF scores that were 0.0749 higher than the baseline.
%R 10.18653/v1/2021.americasnlp-1.26
%U https://aclanthology.org/2021.americasnlp-1.26
%U https://doi.org/10.18653/v1/2021.americasnlp-1.26
%P 234-240
Markdown (Informal)
[Low-Resource Machine Translation Using Cross-Lingual Language Model Pretraining](https://aclanthology.org/2021.americasnlp-1.26) (Zheng et al., AmericasNLP 2021)
ACL