@inproceedings{zhang-etal-2022-improving-machine,
title = "Improving Machine Translation Formality Control with Weakly-Labelled Data Augmentation and Post Editing Strategies",
author = "Zhang, Daniel and
Yu, Jiang and
Verma, Pragati and
Ganesan, Ashwinkumar and
Campbell, Sarah",
editor = "Salesky, Elizabeth and
Federico, Marcello and
Costa-juss{\`a}, Marta",
booktitle = "Proceedings of the 19th International Conference on Spoken Language Translation (IWSLT 2022)",
month = may,
year = "2022",
address = "Dublin, Ireland (in-person and online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.iwslt-1.32",
doi = "10.18653/v1/2022.iwslt-1.32",
pages = "351--360",
abstract = "This paper describes Amazon Alexa AI{'}s implementation for the IWSLT 2022 shared task on formality control. We focus on the unconstrained and supervised task for en→hi (Hindi) and en→ja (Japanese) pairs where very limited formality annotated data is available. We propose three simple yet effective post editing strategies namely, T-V conversion, utilizing a verb conjugator and seq2seq models in order to rewrite the translated phrases into formal or informal language. Considering nuances for formality and informality in different languages, our analysis shows that a language-specific post editing strategy achieves the best performance. To address the unique challenge of limited formality annotations, we further develop a formality classifier to perform weakly labelled data augmentation which automatically generates synthetic formality labels from large parallel corpus. Empirical results on the IWSLT formality testset have shown that proposed system achieved significant improvements in terms of formality accuracy while retaining BLEU score on-par with baseline.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-etal-2022-improving-machine">
<titleInfo>
<title>Improving Machine Translation Formality Control with Weakly-Labelled Data Augmentation and Post Editing Strategies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiang</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pragati</namePart>
<namePart type="family">Verma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ashwinkumar</namePart>
<namePart type="family">Ganesan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sarah</namePart>
<namePart type="family">Campbell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th International Conference on Spoken Language Translation (IWSLT 2022)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Salesky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Federico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marta</namePart>
<namePart type="family">Costa-jussà</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland (in-person and online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes Amazon Alexa AI’s implementation for the IWSLT 2022 shared task on formality control. We focus on the unconstrained and supervised task for en→hi (Hindi) and en→ja (Japanese) pairs where very limited formality annotated data is available. We propose three simple yet effective post editing strategies namely, T-V conversion, utilizing a verb conjugator and seq2seq models in order to rewrite the translated phrases into formal or informal language. Considering nuances for formality and informality in different languages, our analysis shows that a language-specific post editing strategy achieves the best performance. To address the unique challenge of limited formality annotations, we further develop a formality classifier to perform weakly labelled data augmentation which automatically generates synthetic formality labels from large parallel corpus. Empirical results on the IWSLT formality testset have shown that proposed system achieved significant improvements in terms of formality accuracy while retaining BLEU score on-par with baseline.</abstract>
<identifier type="citekey">zhang-etal-2022-improving-machine</identifier>
<identifier type="doi">10.18653/v1/2022.iwslt-1.32</identifier>
<location>
<url>https://aclanthology.org/2022.iwslt-1.32</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>351</start>
<end>360</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Improving Machine Translation Formality Control with Weakly-Labelled Data Augmentation and Post Editing Strategies
%A Zhang, Daniel
%A Yu, Jiang
%A Verma, Pragati
%A Ganesan, Ashwinkumar
%A Campbell, Sarah
%Y Salesky, Elizabeth
%Y Federico, Marcello
%Y Costa-jussà, Marta
%S Proceedings of the 19th International Conference on Spoken Language Translation (IWSLT 2022)
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland (in-person and online)
%F zhang-etal-2022-improving-machine
%X This paper describes Amazon Alexa AI’s implementation for the IWSLT 2022 shared task on formality control. We focus on the unconstrained and supervised task for en→hi (Hindi) and en→ja (Japanese) pairs where very limited formality annotated data is available. We propose three simple yet effective post editing strategies namely, T-V conversion, utilizing a verb conjugator and seq2seq models in order to rewrite the translated phrases into formal or informal language. Considering nuances for formality and informality in different languages, our analysis shows that a language-specific post editing strategy achieves the best performance. To address the unique challenge of limited formality annotations, we further develop a formality classifier to perform weakly labelled data augmentation which automatically generates synthetic formality labels from large parallel corpus. Empirical results on the IWSLT formality testset have shown that proposed system achieved significant improvements in terms of formality accuracy while retaining BLEU score on-par with baseline.
%R 10.18653/v1/2022.iwslt-1.32
%U https://aclanthology.org/2022.iwslt-1.32
%U https://doi.org/10.18653/v1/2022.iwslt-1.32
%P 351-360
Markdown (Informal)
[Improving Machine Translation Formality Control with Weakly-Labelled Data Augmentation and Post Editing Strategies](https://aclanthology.org/2022.iwslt-1.32) (Zhang et al., IWSLT 2022)
ACL