@inproceedings{chen-fazio-2021-morphologically,
title = "Morphologically-Guided Segmentation For Translation of Agglutinative Low-Resource Languages",
author = "Chen, William and
Fazio, Brett",
editor = "Ortega, John and
Ojha, Atul Kr. and
Kann, Katharina and
Liu, Chao-Hong",
booktitle = "Proceedings of the 4th Workshop on Technologies for MT of Low Resource Languages (LoResMT2021)",
month = aug,
year = "2021",
address = "Virtual",
publisher = "Association for Machine Translation in the Americas",
url = "https://aclanthology.org/2021.mtsummit-loresmt.3",
pages = "20--31",
abstract = "Neural Machine Translation (NMT) for Low Resource Languages (LRL) is often limited by the lack of available training data, making it necessary to explore additional techniques to improve translation quality. We propose the use of the Prefix-Root-Postfix-Encoding (PRPE) subword segmentation algorithm to improve translation quality for LRLs, using two agglutinative languages as case studies: Quechua and Indonesian. During the course of our experiments, we reintroduce a parallel corpus for Quechua-Spanish translation that was previously unavailable for NMT. Our experiments show the importance of appropriate subword segmentation, which can go as far as improving translation quality over systems trained on much larger quantities of data. We show this by achieving state-of-the-art results for both languages, obtaining higher BLEU scores than large pre-trained models with much smaller amounts of data.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chen-fazio-2021-morphologically">
<titleInfo>
<title>Morphologically-Guided Segmentation For Translation of Agglutinative Low-Resource Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">William</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brett</namePart>
<namePart type="family">Fazio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Workshop on Technologies for MT of Low Resource Languages (LoResMT2021)</title>
</titleInfo>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="family">Ortega</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katharina</namePart>
<namePart type="family">Kann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chao-Hong</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Machine Translation in the Americas</publisher>
<place>
<placeTerm type="text">Virtual</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Neural Machine Translation (NMT) for Low Resource Languages (LRL) is often limited by the lack of available training data, making it necessary to explore additional techniques to improve translation quality. We propose the use of the Prefix-Root-Postfix-Encoding (PRPE) subword segmentation algorithm to improve translation quality for LRLs, using two agglutinative languages as case studies: Quechua and Indonesian. During the course of our experiments, we reintroduce a parallel corpus for Quechua-Spanish translation that was previously unavailable for NMT. Our experiments show the importance of appropriate subword segmentation, which can go as far as improving translation quality over systems trained on much larger quantities of data. We show this by achieving state-of-the-art results for both languages, obtaining higher BLEU scores than large pre-trained models with much smaller amounts of data.</abstract>
<identifier type="citekey">chen-fazio-2021-morphologically</identifier>
<location>
<url>https://aclanthology.org/2021.mtsummit-loresmt.3</url>
</location>
<part>
<date>2021-08</date>
<extent unit="page">
<start>20</start>
<end>31</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Morphologically-Guided Segmentation For Translation of Agglutinative Low-Resource Languages
%A Chen, William
%A Fazio, Brett
%Y Ortega, John
%Y Ojha, Atul Kr.
%Y Kann, Katharina
%Y Liu, Chao-Hong
%S Proceedings of the 4th Workshop on Technologies for MT of Low Resource Languages (LoResMT2021)
%D 2021
%8 August
%I Association for Machine Translation in the Americas
%C Virtual
%F chen-fazio-2021-morphologically
%X Neural Machine Translation (NMT) for Low Resource Languages (LRL) is often limited by the lack of available training data, making it necessary to explore additional techniques to improve translation quality. We propose the use of the Prefix-Root-Postfix-Encoding (PRPE) subword segmentation algorithm to improve translation quality for LRLs, using two agglutinative languages as case studies: Quechua and Indonesian. During the course of our experiments, we reintroduce a parallel corpus for Quechua-Spanish translation that was previously unavailable for NMT. Our experiments show the importance of appropriate subword segmentation, which can go as far as improving translation quality over systems trained on much larger quantities of data. We show this by achieving state-of-the-art results for both languages, obtaining higher BLEU scores than large pre-trained models with much smaller amounts of data.
%U https://aclanthology.org/2021.mtsummit-loresmt.3
%P 20-31
Markdown (Informal)
[Morphologically-Guided Segmentation For Translation of Agglutinative Low-Resource Languages](https://aclanthology.org/2021.mtsummit-loresmt.3) (Chen & Fazio, LoResMT 2021)
ACL