@inproceedings{feldman-coto-solano-2020-neural,
title = "Neural Machine Translation Models with Back-Translation for the Extremely Low-Resource Indigenous Language {B}ribri",
author = "Feldman, Isaac and
Coto-Solano, Rolando",
editor = "Scott, Donia and
Bel, Nuria and
Zong, Chengqing",
booktitle = "Proceedings of the 28th International Conference on Computational Linguistics",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2020.coling-main.351",
doi = "10.18653/v1/2020.coling-main.351",
pages = "3965--3976",
abstract = "This paper presents a neural machine translation model and dataset for the Chibchan language Bribri, with an average performance of BLEU 16.9{\mbox{$\pm$}}1.7. This was trained on an extremely small dataset (5923 Bribri-Spanish pairs), providing evidence for the applicability of NMT in extremely low-resource environments. We discuss the challenges entailed in managing training input from languages without standard orthographies, we provide evidence of successful learning of Bribri grammar, and also examine the translations of structures that are infrequent in major Indo-European languages, such as positional verbs, ergative markers, numerical classifiers and complex demonstrative systems. In addition to this, we perform an experiment of augmenting the dataset through iterative back-translation (Sennrich et al., 2016a; Hoang et al., 2018) by using Spanish sentences to create synthetic Bribri sentences. This improves the score by an average of 1.0 BLEU, but only when the new Spanish sentences belong to the same domain as the other Spanish examples. This contributes to the small but growing body of research on Chibchan NLP.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="feldman-coto-solano-2020-neural">
<titleInfo>
<title>Neural Machine Translation Models with Back-Translation for the Extremely Low-Resource Indigenous Language Bribri</title>
</titleInfo>
<name type="personal">
<namePart type="given">Isaac</namePart>
<namePart type="family">Feldman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rolando</namePart>
<namePart type="family">Coto-Solano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 28th International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Donia</namePart>
<namePart type="family">Scott</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nuria</namePart>
<namePart type="family">Bel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chengqing</namePart>
<namePart type="family">Zong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona, Spain (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents a neural machine translation model and dataset for the Chibchan language Bribri, with an average performance of BLEU 16.9\pm1.7. This was trained on an extremely small dataset (5923 Bribri-Spanish pairs), providing evidence for the applicability of NMT in extremely low-resource environments. We discuss the challenges entailed in managing training input from languages without standard orthographies, we provide evidence of successful learning of Bribri grammar, and also examine the translations of structures that are infrequent in major Indo-European languages, such as positional verbs, ergative markers, numerical classifiers and complex demonstrative systems. In addition to this, we perform an experiment of augmenting the dataset through iterative back-translation (Sennrich et al., 2016a; Hoang et al., 2018) by using Spanish sentences to create synthetic Bribri sentences. This improves the score by an average of 1.0 BLEU, but only when the new Spanish sentences belong to the same domain as the other Spanish examples. This contributes to the small but growing body of research on Chibchan NLP.</abstract>
<identifier type="citekey">feldman-coto-solano-2020-neural</identifier>
<identifier type="doi">10.18653/v1/2020.coling-main.351</identifier>
<location>
<url>https://aclanthology.org/2020.coling-main.351</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>3965</start>
<end>3976</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Neural Machine Translation Models with Back-Translation for the Extremely Low-Resource Indigenous Language Bribri
%A Feldman, Isaac
%A Coto-Solano, Rolando
%Y Scott, Donia
%Y Bel, Nuria
%Y Zong, Chengqing
%S Proceedings of the 28th International Conference on Computational Linguistics
%D 2020
%8 December
%I International Committee on Computational Linguistics
%C Barcelona, Spain (Online)
%F feldman-coto-solano-2020-neural
%X This paper presents a neural machine translation model and dataset for the Chibchan language Bribri, with an average performance of BLEU 16.9\pm1.7. This was trained on an extremely small dataset (5923 Bribri-Spanish pairs), providing evidence for the applicability of NMT in extremely low-resource environments. We discuss the challenges entailed in managing training input from languages without standard orthographies, we provide evidence of successful learning of Bribri grammar, and also examine the translations of structures that are infrequent in major Indo-European languages, such as positional verbs, ergative markers, numerical classifiers and complex demonstrative systems. In addition to this, we perform an experiment of augmenting the dataset through iterative back-translation (Sennrich et al., 2016a; Hoang et al., 2018) by using Spanish sentences to create synthetic Bribri sentences. This improves the score by an average of 1.0 BLEU, but only when the new Spanish sentences belong to the same domain as the other Spanish examples. This contributes to the small but growing body of research on Chibchan NLP.
%R 10.18653/v1/2020.coling-main.351
%U https://aclanthology.org/2020.coling-main.351
%U https://doi.org/10.18653/v1/2020.coling-main.351
%P 3965-3976
Markdown (Informal)
[Neural Machine Translation Models with Back-Translation for the Extremely Low-Resource Indigenous Language Bribri](https://aclanthology.org/2020.coling-main.351) (Feldman & Coto-Solano, COLING 2020)
ACL