@inproceedings{chollampatt-etal-2020-automatic,
title = "Can Automatic Post-Editing Improve {NMT}?",
author = "Chollampatt, Shamil and
Susanto, Raymond Hendy and
Tan, Liling and
Szymanska, Ewa",
editor = "Webber, Bonnie and
Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.emnlp-main.217",
doi = "10.18653/v1/2020.emnlp-main.217",
pages = "2736--2746",
abstract = "Automatic post-editing (APE) aims to improve machine translations, thereby reducing human post-editing effort. APE has had notable success when used with statistical machine translation (SMT) systems but has not been as successful over neural machine translation (NMT) systems. This has raised questions on the relevance of APE task in the current scenario. However, the training of APE models has been heavily reliant on large-scale artificial corpora combined with only limited human post-edited data. We hypothesize that APE models have been underperforming in improving NMT translations due to the lack of adequate supervision. To ascertain our hypothesis, we compile a larger corpus of human post-edits of English to German NMT. We empirically show that a state-of-art neural APE model trained on this corpus can significantly improve a strong in-domain NMT system, challenging the current understanding in the field. We further investigate the effects of varying training data sizes, using artificial training data, and domain specificity for the APE task. We release this new corpus under CC BY-NC-SA 4.0 license at \url{https://github.com/shamilcm/pedra}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chollampatt-etal-2020-automatic">
<titleInfo>
<title>Can Automatic Post-Editing Improve NMT?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shamil</namePart>
<namePart type="family">Chollampatt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raymond</namePart>
<namePart type="given">Hendy</namePart>
<namePart type="family">Susanto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liling</namePart>
<namePart type="family">Tan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ewa</namePart>
<namePart type="family">Szymanska</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bonnie</namePart>
<namePart type="family">Webber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Trevor</namePart>
<namePart type="family">Cohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Automatic post-editing (APE) aims to improve machine translations, thereby reducing human post-editing effort. APE has had notable success when used with statistical machine translation (SMT) systems but has not been as successful over neural machine translation (NMT) systems. This has raised questions on the relevance of APE task in the current scenario. However, the training of APE models has been heavily reliant on large-scale artificial corpora combined with only limited human post-edited data. We hypothesize that APE models have been underperforming in improving NMT translations due to the lack of adequate supervision. To ascertain our hypothesis, we compile a larger corpus of human post-edits of English to German NMT. We empirically show that a state-of-art neural APE model trained on this corpus can significantly improve a strong in-domain NMT system, challenging the current understanding in the field. We further investigate the effects of varying training data sizes, using artificial training data, and domain specificity for the APE task. We release this new corpus under CC BY-NC-SA 4.0 license at https://github.com/shamilcm/pedra.</abstract>
<identifier type="citekey">chollampatt-etal-2020-automatic</identifier>
<identifier type="doi">10.18653/v1/2020.emnlp-main.217</identifier>
<location>
<url>https://aclanthology.org/2020.emnlp-main.217</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>2736</start>
<end>2746</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Can Automatic Post-Editing Improve NMT?
%A Chollampatt, Shamil
%A Susanto, Raymond Hendy
%A Tan, Liling
%A Szymanska, Ewa
%Y Webber, Bonnie
%Y Cohn, Trevor
%Y He, Yulan
%Y Liu, Yang
%S Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F chollampatt-etal-2020-automatic
%X Automatic post-editing (APE) aims to improve machine translations, thereby reducing human post-editing effort. APE has had notable success when used with statistical machine translation (SMT) systems but has not been as successful over neural machine translation (NMT) systems. This has raised questions on the relevance of APE task in the current scenario. However, the training of APE models has been heavily reliant on large-scale artificial corpora combined with only limited human post-edited data. We hypothesize that APE models have been underperforming in improving NMT translations due to the lack of adequate supervision. To ascertain our hypothesis, we compile a larger corpus of human post-edits of English to German NMT. We empirically show that a state-of-art neural APE model trained on this corpus can significantly improve a strong in-domain NMT system, challenging the current understanding in the field. We further investigate the effects of varying training data sizes, using artificial training data, and domain specificity for the APE task. We release this new corpus under CC BY-NC-SA 4.0 license at https://github.com/shamilcm/pedra.
%R 10.18653/v1/2020.emnlp-main.217
%U https://aclanthology.org/2020.emnlp-main.217
%U https://doi.org/10.18653/v1/2020.emnlp-main.217
%P 2736-2746
Markdown (Informal)
[Can Automatic Post-Editing Improve NMT?](https://aclanthology.org/2020.emnlp-main.217) (Chollampatt et al., EMNLP 2020)
ACL
- Shamil Chollampatt, Raymond Hendy Susanto, Liling Tan, and Ewa Szymanska. 2020. Can Automatic Post-Editing Improve NMT?. In Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), pages 2736–2746, Online. Association for Computational Linguistics.