@inproceedings{alam-anastasopoulos-2020-fine,
title = "Fine-Tuning {MT} systems for Robustness to Second-Language Speaker Variations",
author = "Alam, Md Mahfuz Ibn and
Anastasopoulos, Antonios",
editor = "Xu, Wei and
Ritter, Alan and
Baldwin, Tim and
Rahimi, Afshin",
booktitle = "Proceedings of the Sixth Workshop on Noisy User-generated Text (W-NUT 2020)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.wnut-1.20",
doi = "10.18653/v1/2020.wnut-1.20",
pages = "149--158",
abstract = "The performance of neural machine translation (NMT) systems only trained on a single language variant degrades when confronted with even slightly different language variations. With this work, we build upon previous work to explore how to mitigate this issue. We show that fine-tuning using naturally occurring noise along with pseudo-references (i.e. {``}corrected{''} non-native inputs translated using the baseline NMT system) is a promising solution towards systems robust to such type of input variations. We focus on four translation pairs, from English to Spanish, Italian, French, and Portuguese, with our system achieving improvements of up to 3.1 BLEU points compared to the baselines, establishing a new state-of-the-art on the JFLEG-ES dataset. All datasets and code are publicly available here: \url{https://github.com/mahfuzibnalam/finetuning_for_robustness} .",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="alam-anastasopoulos-2020-fine">
<titleInfo>
<title>Fine-Tuning MT systems for Robustness to Second-Language Speaker Variations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Md</namePart>
<namePart type="given">Mahfuz</namePart>
<namePart type="given">Ibn</namePart>
<namePart type="family">Alam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonios</namePart>
<namePart type="family">Anastasopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth Workshop on Noisy User-generated Text (W-NUT 2020)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tim</namePart>
<namePart type="family">Baldwin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Afshin</namePart>
<namePart type="family">Rahimi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The performance of neural machine translation (NMT) systems only trained on a single language variant degrades when confronted with even slightly different language variations. With this work, we build upon previous work to explore how to mitigate this issue. We show that fine-tuning using naturally occurring noise along with pseudo-references (i.e. “corrected” non-native inputs translated using the baseline NMT system) is a promising solution towards systems robust to such type of input variations. We focus on four translation pairs, from English to Spanish, Italian, French, and Portuguese, with our system achieving improvements of up to 3.1 BLEU points compared to the baselines, establishing a new state-of-the-art on the JFLEG-ES dataset. All datasets and code are publicly available here: https://github.com/mahfuzibnalam/finetuning_for_robustness .</abstract>
<identifier type="citekey">alam-anastasopoulos-2020-fine</identifier>
<identifier type="doi">10.18653/v1/2020.wnut-1.20</identifier>
<location>
<url>https://aclanthology.org/2020.wnut-1.20</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>149</start>
<end>158</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Fine-Tuning MT systems for Robustness to Second-Language Speaker Variations
%A Alam, Md Mahfuz Ibn
%A Anastasopoulos, Antonios
%Y Xu, Wei
%Y Ritter, Alan
%Y Baldwin, Tim
%Y Rahimi, Afshin
%S Proceedings of the Sixth Workshop on Noisy User-generated Text (W-NUT 2020)
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F alam-anastasopoulos-2020-fine
%X The performance of neural machine translation (NMT) systems only trained on a single language variant degrades when confronted with even slightly different language variations. With this work, we build upon previous work to explore how to mitigate this issue. We show that fine-tuning using naturally occurring noise along with pseudo-references (i.e. “corrected” non-native inputs translated using the baseline NMT system) is a promising solution towards systems robust to such type of input variations. We focus on four translation pairs, from English to Spanish, Italian, French, and Portuguese, with our system achieving improvements of up to 3.1 BLEU points compared to the baselines, establishing a new state-of-the-art on the JFLEG-ES dataset. All datasets and code are publicly available here: https://github.com/mahfuzibnalam/finetuning_for_robustness .
%R 10.18653/v1/2020.wnut-1.20
%U https://aclanthology.org/2020.wnut-1.20
%U https://doi.org/10.18653/v1/2020.wnut-1.20
%P 149-158
Markdown (Informal)
[Fine-Tuning MT systems for Robustness to Second-Language Speaker Variations](https://aclanthology.org/2020.wnut-1.20) (Alam & Anastasopoulos, WNUT 2020)
ACL