@inproceedings{poncelas-etal-2020-using,
title = "Using Multiple Subwords to Improve {E}nglish-{E}speranto Automated Literary Translation Quality",
author = "Poncelas, Alberto and
Buts, Jan and
Hadley, James and
Way, Andy",
editor = "Karakanta, Alina and
Ojha, Atul Kr. and
Liu, Chao-Hong and
Abbott, Jade and
Ortega, John and
Washington, Jonathan and
Oco, Nathaniel and
Lakew, Surafel Melaku and
Pirinen, Tommi A and
Malykh, Valentin and
Logacheva, Varvara and
Zhao, Xiaobing",
booktitle = "Proceedings of the 3rd Workshop on Technologies for MT of Low Resource Languages",
month = dec,
year = "2020",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.loresmt-1.14",
pages = "108--117",
abstract = "Building Machine Translation (MT) systems for low-resource languages remains challenging. For many language pairs, parallel data are not widely available, and in such cases MT models do not achieve results comparable to those seen with high-resource languages. When data are scarce, it is of paramount importance to make optimal use of the limited material available. To that end, in this paper we propose employing the same parallel sentences multiple times, only changing the way the words are split each time. For this purpose we use several Byte Pair Encoding models, with various merge operations used in their configuration. In our experiments, we use this technique to expand the available data and improve an MT system involving a low-resource language pair, namely English-Esperanto. As an additional contribution, we made available a set of English-Esperanto parallel data in the literary domain.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="poncelas-etal-2020-using">
<titleInfo>
<title>Using Multiple Subwords to Improve English-Esperanto Automated Literary Translation Quality</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alberto</namePart>
<namePart type="family">Poncelas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Buts</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Hadley</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andy</namePart>
<namePart type="family">Way</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 3rd Workshop on Technologies for MT of Low Resource Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alina</namePart>
<namePart type="family">Karakanta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chao-Hong</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jade</namePart>
<namePart type="family">Abbott</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="family">Ortega</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="family">Washington</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nathaniel</namePart>
<namePart type="family">Oco</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Surafel</namePart>
<namePart type="given">Melaku</namePart>
<namePart type="family">Lakew</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tommi</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Pirinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Valentin</namePart>
<namePart type="family">Malykh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Varvara</namePart>
<namePart type="family">Logacheva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaobing</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Building Machine Translation (MT) systems for low-resource languages remains challenging. For many language pairs, parallel data are not widely available, and in such cases MT models do not achieve results comparable to those seen with high-resource languages. When data are scarce, it is of paramount importance to make optimal use of the limited material available. To that end, in this paper we propose employing the same parallel sentences multiple times, only changing the way the words are split each time. For this purpose we use several Byte Pair Encoding models, with various merge operations used in their configuration. In our experiments, we use this technique to expand the available data and improve an MT system involving a low-resource language pair, namely English-Esperanto. As an additional contribution, we made available a set of English-Esperanto parallel data in the literary domain.</abstract>
<identifier type="citekey">poncelas-etal-2020-using</identifier>
<location>
<url>https://aclanthology.org/2020.loresmt-1.14</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>108</start>
<end>117</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Using Multiple Subwords to Improve English-Esperanto Automated Literary Translation Quality
%A Poncelas, Alberto
%A Buts, Jan
%A Hadley, James
%A Way, Andy
%Y Karakanta, Alina
%Y Ojha, Atul Kr.
%Y Liu, Chao-Hong
%Y Abbott, Jade
%Y Ortega, John
%Y Washington, Jonathan
%Y Oco, Nathaniel
%Y Lakew, Surafel Melaku
%Y Pirinen, Tommi A.
%Y Malykh, Valentin
%Y Logacheva, Varvara
%Y Zhao, Xiaobing
%S Proceedings of the 3rd Workshop on Technologies for MT of Low Resource Languages
%D 2020
%8 December
%I Association for Computational Linguistics
%C Suzhou, China
%F poncelas-etal-2020-using
%X Building Machine Translation (MT) systems for low-resource languages remains challenging. For many language pairs, parallel data are not widely available, and in such cases MT models do not achieve results comparable to those seen with high-resource languages. When data are scarce, it is of paramount importance to make optimal use of the limited material available. To that end, in this paper we propose employing the same parallel sentences multiple times, only changing the way the words are split each time. For this purpose we use several Byte Pair Encoding models, with various merge operations used in their configuration. In our experiments, we use this technique to expand the available data and improve an MT system involving a low-resource language pair, namely English-Esperanto. As an additional contribution, we made available a set of English-Esperanto parallel data in the literary domain.
%U https://aclanthology.org/2020.loresmt-1.14
%P 108-117
Markdown (Informal)
[Using Multiple Subwords to Improve English-Esperanto Automated Literary Translation Quality](https://aclanthology.org/2020.loresmt-1.14) (Poncelas et al., LoResMT 2020)
ACL