@inproceedings{gao-etal-2022-bi,
title = "{B}i-{S}im{C}ut: A Simple Strategy for Boosting Neural Machine Translation",
author = "Gao, Pengzhi and
He, Zhongjun and
Wu, Hua and
Wang, Haifeng",
editor = "Carpuat, Marine and
de Marneffe, Marie-Catherine and
Meza Ruiz, Ivan Vladimir",
booktitle = "Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies",
month = jul,
year = "2022",
address = "Seattle, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.naacl-main.289/",
doi = "10.18653/v1/2022.naacl-main.289",
pages = "3938--3948",
abstract = "We introduce Bi-SimCut: a simple but effective training strategy to boost neural machine translation (NMT) performance. It consists of two procedures: bidirectional pretraining and unidirectional finetuning. Both procedures utilize SimCut, a simple regularization method that forces the consistency between the output distributions of the original and the cutoff sentence pairs. Without leveraging extra dataset via back-translation or integrating large-scale pretrained model, Bi-SimCut achieves strong translation performance across five translation benchmarks (data sizes range from 160K to 20.2M): BLEU scores of 31.16 for $\texttt{en}\rightarrow\texttt{de}$ and 38.37 for $\texttt{de}\rightarrow\texttt{en}$ on the IWSLT14 dataset, 30.78 for $\texttt{en}\rightarrow\texttt{de}$ and 35.15 for $\texttt{de}\rightarrow\texttt{en}$ on the WMT14 dataset, and 27.17 for $\texttt{zh}\rightarrow\texttt{en}$ on the WMT17 dataset. SimCut is not a new method, but a version of Cutoff (Shen et al., 2020) simplified and adapted for NMT, and it could be considered as a perturbation-based method. Given the universality and simplicity of Bi-SimCut and SimCut, we believe they can serve as strong baselines for future NMT research."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gao-etal-2022-bi">
<titleInfo>
<title>Bi-SimCut: A Simple Strategy for Boosting Neural Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pengzhi</namePart>
<namePart type="family">Gao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhongjun</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hua</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haifeng</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marine</namePart>
<namePart type="family">Carpuat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie-Catherine</namePart>
<namePart type="family">de Marneffe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="given">Vladimir</namePart>
<namePart type="family">Meza Ruiz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We introduce Bi-SimCut: a simple but effective training strategy to boost neural machine translation (NMT) performance. It consists of two procedures: bidirectional pretraining and unidirectional finetuning. Both procedures utilize SimCut, a simple regularization method that forces the consistency between the output distributions of the original and the cutoff sentence pairs. Without leveraging extra dataset via back-translation or integrating large-scale pretrained model, Bi-SimCut achieves strong translation performance across five translation benchmarks (data sizes range from 160K to 20.2M): BLEU scores of 31.16 for en\rightarrowde and 38.37 for de\rightarrowen on the IWSLT14 dataset, 30.78 for en\rightarrowde and 35.15 for de\rightarrowen on the WMT14 dataset, and 27.17 for zh\rightarrowen on the WMT17 dataset. SimCut is not a new method, but a version of Cutoff (Shen et al., 2020) simplified and adapted for NMT, and it could be considered as a perturbation-based method. Given the universality and simplicity of Bi-SimCut and SimCut, we believe they can serve as strong baselines for future NMT research.</abstract>
<identifier type="citekey">gao-etal-2022-bi</identifier>
<identifier type="doi">10.18653/v1/2022.naacl-main.289</identifier>
<location>
<url>https://aclanthology.org/2022.naacl-main.289/</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>3938</start>
<end>3948</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Bi-SimCut: A Simple Strategy for Boosting Neural Machine Translation
%A Gao, Pengzhi
%A He, Zhongjun
%A Wu, Hua
%A Wang, Haifeng
%Y Carpuat, Marine
%Y de Marneffe, Marie-Catherine
%Y Meza Ruiz, Ivan Vladimir
%S Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies
%D 2022
%8 July
%I Association for Computational Linguistics
%C Seattle, United States
%F gao-etal-2022-bi
%X We introduce Bi-SimCut: a simple but effective training strategy to boost neural machine translation (NMT) performance. It consists of two procedures: bidirectional pretraining and unidirectional finetuning. Both procedures utilize SimCut, a simple regularization method that forces the consistency between the output distributions of the original and the cutoff sentence pairs. Without leveraging extra dataset via back-translation or integrating large-scale pretrained model, Bi-SimCut achieves strong translation performance across five translation benchmarks (data sizes range from 160K to 20.2M): BLEU scores of 31.16 for en\rightarrowde and 38.37 for de\rightarrowen on the IWSLT14 dataset, 30.78 for en\rightarrowde and 35.15 for de\rightarrowen on the WMT14 dataset, and 27.17 for zh\rightarrowen on the WMT17 dataset. SimCut is not a new method, but a version of Cutoff (Shen et al., 2020) simplified and adapted for NMT, and it could be considered as a perturbation-based method. Given the universality and simplicity of Bi-SimCut and SimCut, we believe they can serve as strong baselines for future NMT research.
%R 10.18653/v1/2022.naacl-main.289
%U https://aclanthology.org/2022.naacl-main.289/
%U https://doi.org/10.18653/v1/2022.naacl-main.289
%P 3938-3948
Markdown (Informal)
[Bi-SimCut: A Simple Strategy for Boosting Neural Machine Translation](https://aclanthology.org/2022.naacl-main.289/) (Gao et al., NAACL 2022)
ACL
- Pengzhi Gao, Zhongjun He, Hua Wu, and Haifeng Wang. 2022. Bi-SimCut: A Simple Strategy for Boosting Neural Machine Translation. In Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pages 3938–3948, Seattle, United States. Association for Computational Linguistics.