@inproceedings{tars-etal-2021-extremely,
title = "Extremely low-resource machine translation for closely related languages",
author = {Tars, Maali and
T{\"a}ttar, Andre and
Fi{\v{s}}el, Mark},
editor = "Dobnik, Simon and
{\O}vrelid, Lilja",
booktitle = "Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)",
month = may # " 31--2 " # jun,
year = "2021",
address = "Reykjavik, Iceland (Online)",
publisher = {Link{\"o}ping University Electronic Press, Sweden},
url = "https://aclanthology.org/2021.nodalida-main.5",
pages = "41--52",
abstract = "An effective method to improve extremely low-resource neural machine translation is multilingual training, which can be improved by leveraging monolingual data to create synthetic bilingual corpora using the back-translation method. This work focuses on closely related languages from the Uralic language family: from Estonian and Finnish geographical regions. We find that multilingual learning and synthetic corpora increase the translation quality in every language pair for which we have data. We show that transfer learning and fine-tuning are very effective for doing low-resource machine translation and achieve the best results. We collected new parallel data for V{\~o}ro, North and South Saami and present first results of neural machine translation for these languages.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tars-etal-2021-extremely">
<titleInfo>
<title>Extremely low-resource machine translation for closely related languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maali</namePart>
<namePart type="family">Tars</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Tättar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Fišel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-may 31–2 jun</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Dobnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lilja</namePart>
<namePart type="family">Øvrelid</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Linköping University Electronic Press, Sweden</publisher>
<place>
<placeTerm type="text">Reykjavik, Iceland (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>An effective method to improve extremely low-resource neural machine translation is multilingual training, which can be improved by leveraging monolingual data to create synthetic bilingual corpora using the back-translation method. This work focuses on closely related languages from the Uralic language family: from Estonian and Finnish geographical regions. We find that multilingual learning and synthetic corpora increase the translation quality in every language pair for which we have data. We show that transfer learning and fine-tuning are very effective for doing low-resource machine translation and achieve the best results. We collected new parallel data for Võro, North and South Saami and present first results of neural machine translation for these languages.</abstract>
<identifier type="citekey">tars-etal-2021-extremely</identifier>
<location>
<url>https://aclanthology.org/2021.nodalida-main.5</url>
</location>
<part>
<date>2021-may 31–2 jun</date>
<extent unit="page">
<start>41</start>
<end>52</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Extremely low-resource machine translation for closely related languages
%A Tars, Maali
%A Tättar, Andre
%A Fišel, Mark
%Y Dobnik, Simon
%Y Øvrelid, Lilja
%S Proceedings of the 23rd Nordic Conference on Computational Linguistics (NoDaLiDa)
%D 2021
%8 may 31–2 jun
%I Linköping University Electronic Press, Sweden
%C Reykjavik, Iceland (Online)
%F tars-etal-2021-extremely
%X An effective method to improve extremely low-resource neural machine translation is multilingual training, which can be improved by leveraging monolingual data to create synthetic bilingual corpora using the back-translation method. This work focuses on closely related languages from the Uralic language family: from Estonian and Finnish geographical regions. We find that multilingual learning and synthetic corpora increase the translation quality in every language pair for which we have data. We show that transfer learning and fine-tuning are very effective for doing low-resource machine translation and achieve the best results. We collected new parallel data for Võro, North and South Saami and present first results of neural machine translation for these languages.
%U https://aclanthology.org/2021.nodalida-main.5
%P 41-52
Markdown (Informal)
[Extremely low-resource machine translation for closely related languages](https://aclanthology.org/2021.nodalida-main.5) (Tars et al., NoDaLiDa 2021)
ACL