@inproceedings{gete-etchegoyhen-2024-promoting,
title = "Promoting Target Data in Context-aware Neural Machine Translation",
author = "Gete, Harritxu and
Etchegoyhen, Thierry",
editor = "Scarton, Carolina and
Prescott, Charlotte and
Bayliss, Chris and
Oakley, Chris and
Wright, Joanna and
Wrigley, Stuart and
Song, Xingyi and
Gow-Smith, Edward and
Bawden, Rachel and
S{\'a}nchez-Cartagena, V{\'\i}ctor M and
Cadwell, Patrick and
Lapshinova-Koltunski, Ekaterina and
Cabarr{\~a}o, Vera and
Chatzitheodorou, Konstantinos and
Nurminen, Mary and
Kanojia, Diptesh and
Moniz, Helena",
booktitle = "Proceedings of the 25th Annual Conference of the European Association for Machine Translation (Volume 1)",
month = jun,
year = "2024",
address = "Sheffield, UK",
publisher = "European Association for Machine Translation (EAMT)",
url = "https://aclanthology.org/2024.eamt-1.6",
pages = "9--23",
abstract = "Standard context-aware neural machine translation (NMT) typically relies on parallel document-level data, exploiting both source and target contexts. Concatenation-based approaches in particular, still a strong baseline for document-level NMT, prepend source and/or target context sentences to the sentences to be translated, with model variants that exploit equal amounts of source and target data on each side achieving state-of-the-art results. In this work, we investigate whether target data should be further promoted within standard concatenation-based approaches, as most document-level phenomena rely on information that is present on the target language side. We evaluate novel concatenation-based variants where the target context is prepended to the source language, either in isolation or in combination with the source context. Experimental results in English-Russian and Basque-Spanish show that including target context in the source leads to large improvements on target language phenomena. On source-dependent phenomena, using only target language context in the source achieves parity with state-of-the-art concatenation approaches, or slightly underperforms, whereas combining source and target context on the source side leads to significant gains across the board.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gete-etchegoyhen-2024-promoting">
<titleInfo>
<title>Promoting Target Data in Context-aware Neural Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Harritxu</namePart>
<namePart type="family">Gete</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Etchegoyhen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 25th Annual Conference of the European Association for Machine Translation (Volume 1)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Carolina</namePart>
<namePart type="family">Scarton</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Charlotte</namePart>
<namePart type="family">Prescott</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Bayliss</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Oakley</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joanna</namePart>
<namePart type="family">Wright</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stuart</namePart>
<namePart type="family">Wrigley</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xingyi</namePart>
<namePart type="family">Song</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Edward</namePart>
<namePart type="family">Gow-Smith</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rachel</namePart>
<namePart type="family">Bawden</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Víctor</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Sánchez-Cartagena</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patrick</namePart>
<namePart type="family">Cadwell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Lapshinova-Koltunski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Cabarrão</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Konstantinos</namePart>
<namePart type="family">Chatzitheodorou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mary</namePart>
<namePart type="family">Nurminen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diptesh</namePart>
<namePart type="family">Kanojia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Moniz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Association for Machine Translation (EAMT)</publisher>
<place>
<placeTerm type="text">Sheffield, UK</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Standard context-aware neural machine translation (NMT) typically relies on parallel document-level data, exploiting both source and target contexts. Concatenation-based approaches in particular, still a strong baseline for document-level NMT, prepend source and/or target context sentences to the sentences to be translated, with model variants that exploit equal amounts of source and target data on each side achieving state-of-the-art results. In this work, we investigate whether target data should be further promoted within standard concatenation-based approaches, as most document-level phenomena rely on information that is present on the target language side. We evaluate novel concatenation-based variants where the target context is prepended to the source language, either in isolation or in combination with the source context. Experimental results in English-Russian and Basque-Spanish show that including target context in the source leads to large improvements on target language phenomena. On source-dependent phenomena, using only target language context in the source achieves parity with state-of-the-art concatenation approaches, or slightly underperforms, whereas combining source and target context on the source side leads to significant gains across the board.</abstract>
<identifier type="citekey">gete-etchegoyhen-2024-promoting</identifier>
<location>
<url>https://aclanthology.org/2024.eamt-1.6</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>9</start>
<end>23</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Promoting Target Data in Context-aware Neural Machine Translation
%A Gete, Harritxu
%A Etchegoyhen, Thierry
%Y Scarton, Carolina
%Y Prescott, Charlotte
%Y Bayliss, Chris
%Y Oakley, Chris
%Y Wright, Joanna
%Y Wrigley, Stuart
%Y Song, Xingyi
%Y Gow-Smith, Edward
%Y Bawden, Rachel
%Y Sánchez-Cartagena, Víctor M.
%Y Cadwell, Patrick
%Y Lapshinova-Koltunski, Ekaterina
%Y Cabarrão, Vera
%Y Chatzitheodorou, Konstantinos
%Y Nurminen, Mary
%Y Kanojia, Diptesh
%Y Moniz, Helena
%S Proceedings of the 25th Annual Conference of the European Association for Machine Translation (Volume 1)
%D 2024
%8 June
%I European Association for Machine Translation (EAMT)
%C Sheffield, UK
%F gete-etchegoyhen-2024-promoting
%X Standard context-aware neural machine translation (NMT) typically relies on parallel document-level data, exploiting both source and target contexts. Concatenation-based approaches in particular, still a strong baseline for document-level NMT, prepend source and/or target context sentences to the sentences to be translated, with model variants that exploit equal amounts of source and target data on each side achieving state-of-the-art results. In this work, we investigate whether target data should be further promoted within standard concatenation-based approaches, as most document-level phenomena rely on information that is present on the target language side. We evaluate novel concatenation-based variants where the target context is prepended to the source language, either in isolation or in combination with the source context. Experimental results in English-Russian and Basque-Spanish show that including target context in the source leads to large improvements on target language phenomena. On source-dependent phenomena, using only target language context in the source achieves parity with state-of-the-art concatenation approaches, or slightly underperforms, whereas combining source and target context on the source side leads to significant gains across the board.
%U https://aclanthology.org/2024.eamt-1.6
%P 9-23
Markdown (Informal)
[Promoting Target Data in Context-aware Neural Machine Translation](https://aclanthology.org/2024.eamt-1.6) (Gete & Etchegoyhen, EAMT 2024)
ACL