@inproceedings{robinson-etal-2022-data,
title = "Data-adaptive Transfer Learning for Translation: A Case Study in {H}aitian and Jamaican",
author = "Robinson, Nathaniel and
Hogan, Cameron and
Fulda, Nancy and
Mortensen, David R.",
editor = "Ojha, Atul Kr. and
Liu, Chao-Hong and
Vylomova, Ekaterina and
Abbott, Jade and
Washington, Jonathan and
Oco, Nathaniel and
Pirinen, Tommi A and
Malykh, Valentin and
Logacheva, Varvara and
Zhao, Xiaobing",
booktitle = "Proceedings of the Fifth Workshop on Technologies for Machine Translation of Low-Resource Languages (LoResMT 2022)",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.loresmt-1.5",
pages = "35--42",
abstract = "Multilingual transfer techniques often improve low-resource machine translation (MT). Many of these techniques are applied without considering data characteristics. We show in the context of Haitian-to-English translation that transfer effectiveness is correlated with amount of training data and relationships between knowledge-sharing languages. Our experiments suggest that for some languages beyond a threshold of authentic data, back-translation augmentation methods are counterproductive, while cross-lingual transfer from a sufficiently related language is preferred. We complement this finding by contributing a rule-based French-Haitian orthographic and syntactic engine and a novel method for phonological embedding. When used with multilingual techniques, orthographic transformation makes statistically significant improvements over conventional methods. And in very low-resource Jamaican MT, code-switching with a transfer language for orthographic resemblance yields a 6.63 BLEU point advantage.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="robinson-etal-2022-data">
<titleInfo>
<title>Data-adaptive Transfer Learning for Translation: A Case Study in Haitian and Jamaican</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nathaniel</namePart>
<namePart type="family">Robinson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cameron</namePart>
<namePart type="family">Hogan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nancy</namePart>
<namePart type="family">Fulda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="given">R</namePart>
<namePart type="family">Mortensen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Workshop on Technologies for Machine Translation of Low-Resource Languages (LoResMT 2022)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chao-Hong</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Vylomova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jade</namePart>
<namePart type="family">Abbott</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="family">Washington</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nathaniel</namePart>
<namePart type="family">Oco</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tommi</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Pirinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Valentin</namePart>
<namePart type="family">Malykh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Varvara</namePart>
<namePart type="family">Logacheva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaobing</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gyeongju, Republic of Korea</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Multilingual transfer techniques often improve low-resource machine translation (MT). Many of these techniques are applied without considering data characteristics. We show in the context of Haitian-to-English translation that transfer effectiveness is correlated with amount of training data and relationships between knowledge-sharing languages. Our experiments suggest that for some languages beyond a threshold of authentic data, back-translation augmentation methods are counterproductive, while cross-lingual transfer from a sufficiently related language is preferred. We complement this finding by contributing a rule-based French-Haitian orthographic and syntactic engine and a novel method for phonological embedding. When used with multilingual techniques, orthographic transformation makes statistically significant improvements over conventional methods. And in very low-resource Jamaican MT, code-switching with a transfer language for orthographic resemblance yields a 6.63 BLEU point advantage.</abstract>
<identifier type="citekey">robinson-etal-2022-data</identifier>
<location>
<url>https://aclanthology.org/2022.loresmt-1.5</url>
</location>
<part>
<date>2022-10</date>
<extent unit="page">
<start>35</start>
<end>42</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Data-adaptive Transfer Learning for Translation: A Case Study in Haitian and Jamaican
%A Robinson, Nathaniel
%A Hogan, Cameron
%A Fulda, Nancy
%A Mortensen, David R.
%Y Ojha, Atul Kr.
%Y Liu, Chao-Hong
%Y Vylomova, Ekaterina
%Y Abbott, Jade
%Y Washington, Jonathan
%Y Oco, Nathaniel
%Y Pirinen, Tommi A.
%Y Malykh, Valentin
%Y Logacheva, Varvara
%Y Zhao, Xiaobing
%S Proceedings of the Fifth Workshop on Technologies for Machine Translation of Low-Resource Languages (LoResMT 2022)
%D 2022
%8 October
%I Association for Computational Linguistics
%C Gyeongju, Republic of Korea
%F robinson-etal-2022-data
%X Multilingual transfer techniques often improve low-resource machine translation (MT). Many of these techniques are applied without considering data characteristics. We show in the context of Haitian-to-English translation that transfer effectiveness is correlated with amount of training data and relationships between knowledge-sharing languages. Our experiments suggest that for some languages beyond a threshold of authentic data, back-translation augmentation methods are counterproductive, while cross-lingual transfer from a sufficiently related language is preferred. We complement this finding by contributing a rule-based French-Haitian orthographic and syntactic engine and a novel method for phonological embedding. When used with multilingual techniques, orthographic transformation makes statistically significant improvements over conventional methods. And in very low-resource Jamaican MT, code-switching with a transfer language for orthographic resemblance yields a 6.63 BLEU point advantage.
%U https://aclanthology.org/2022.loresmt-1.5
%P 35-42
Markdown (Informal)
[Data-adaptive Transfer Learning for Translation: A Case Study in Haitian and Jamaican](https://aclanthology.org/2022.loresmt-1.5) (Robinson et al., LoResMT 2022)
ACL