@inproceedings{zhong-etal-2024-context,
title = "Context Consistency between Training and Inference in Simultaneous Machine Translation",
author = "Zhong, Meizhi and
Liu, Lemao and
Chen, Kehai and
Yang, Mingming and
Zhang, Min",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.luhme-long.727/",
doi = "10.18653/v1/2024.acl-long.727",
pages = "13465--13476",
abstract = "Simultaneous Machine Translation (SiMT) aims to yield a real-time partial translation with a monotonically growing source-side context.However, there is a counterintuitive phenomenon about the context usage between training and inference: *e.g.*, in wait-$k$ inference, model consistently trained with wait-$k$ is much worse than that model inconsistently trained with wait-$k'$ ($k'\neq k$) in terms of translation quality. To this end, we first investigate the underlying reasons behind this phenomenon and uncover the following two factors: 1) the limited correlation between translation quality and training loss; 2) exposure bias between training and inference. Based on both reasons, we then propose an effective training approach called context consistency training accordingly, which encourages consistent context usage between training and inference by optimizing translation quality and latency as bi-objectives and exposing the predictions to the model during the training. The experiments on three language pairs demonstrate that our SiMT system encouraging context consistency outperforms existing SiMT systems with context inconsistency for the first time."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhong-etal-2024-context">
<titleInfo>
<title>Context Consistency between Training and Inference in Simultaneous Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Meizhi</namePart>
<namePart type="family">Zhong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lemao</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kehai</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mingming</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Srikumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Simultaneous Machine Translation (SiMT) aims to yield a real-time partial translation with a monotonically growing source-side context.However, there is a counterintuitive phenomenon about the context usage between training and inference: *e.g.*, in wait-k inference, model consistently trained with wait-k is much worse than that model inconsistently trained with wait-k’ (k’ʼneq k) in terms of translation quality. To this end, we first investigate the underlying reasons behind this phenomenon and uncover the following two factors: 1) the limited correlation between translation quality and training loss; 2) exposure bias between training and inference. Based on both reasons, we then propose an effective training approach called context consistency training accordingly, which encourages consistent context usage between training and inference by optimizing translation quality and latency as bi-objectives and exposing the predictions to the model during the training. The experiments on three language pairs demonstrate that our SiMT system encouraging context consistency outperforms existing SiMT systems with context inconsistency for the first time.</abstract>
<identifier type="citekey">zhong-etal-2024-context</identifier>
<identifier type="doi">10.18653/v1/2024.acl-long.727</identifier>
<location>
<url>https://aclanthology.org/2024.luhme-long.727/</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>13465</start>
<end>13476</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Context Consistency between Training and Inference in Simultaneous Machine Translation
%A Zhong, Meizhi
%A Liu, Lemao
%A Chen, Kehai
%A Yang, Mingming
%A Zhang, Min
%Y Ku, Lun-Wei
%Y Martins, Andre
%Y Srikumar, Vivek
%S Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F zhong-etal-2024-context
%X Simultaneous Machine Translation (SiMT) aims to yield a real-time partial translation with a monotonically growing source-side context.However, there is a counterintuitive phenomenon about the context usage between training and inference: *e.g.*, in wait-k inference, model consistently trained with wait-k is much worse than that model inconsistently trained with wait-k’ (k’ʼneq k) in terms of translation quality. To this end, we first investigate the underlying reasons behind this phenomenon and uncover the following two factors: 1) the limited correlation between translation quality and training loss; 2) exposure bias between training and inference. Based on both reasons, we then propose an effective training approach called context consistency training accordingly, which encourages consistent context usage between training and inference by optimizing translation quality and latency as bi-objectives and exposing the predictions to the model during the training. The experiments on three language pairs demonstrate that our SiMT system encouraging context consistency outperforms existing SiMT systems with context inconsistency for the first time.
%R 10.18653/v1/2024.acl-long.727
%U https://aclanthology.org/2024.luhme-long.727/
%U https://doi.org/10.18653/v1/2024.acl-long.727
%P 13465-13476
Markdown (Informal)
[Context Consistency between Training and Inference in Simultaneous Machine Translation](https://aclanthology.org/2024.luhme-long.727/) (Zhong et al., ACL 2024)
ACL