@inproceedings{donato-etal-2021-diverse,
title = "Diverse Pretrained Context Encodings Improve Document Translation",
author = "Donato, Domenic and
Yu, Lei and
Dyer, Chris",
editor = "Zong, Chengqing and
Xia, Fei and
Li, Wenjie and
Navigli, Roberto",
booktitle = "Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)",
month = aug,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.acl-long.104",
doi = "10.18653/v1/2021.acl-long.104",
pages = "1299--1311",
abstract = "We propose a new architecture for adapting a sentence-level sequence-to-sequence transformer by incorporating multiple pre-trained document context signals and assess the impact on translation performance of (1) different pretraining approaches for generating these signals, (2) the quantity of parallel data for which document context is available, and (3) conditioning on source, target, or source and target contexts. Experiments on the NIST Chinese-English, and IWSLT and WMT English-German tasks support four general conclusions: that using pre-trained context representations markedly improves sample efficiency, that adequate parallel data resources are crucial for learning to use document context, that jointly conditioning on multiple context representations outperforms any single representation, and that source context is more valuable for translation performance than target side context. Our best multi-context model consistently outperforms the best existing context-aware transformers.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="donato-etal-2021-diverse">
<titleInfo>
<title>Diverse Pretrained Context Encodings Improve Document Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Domenic</namePart>
<namePart type="family">Donato</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lei</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Dyer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chengqing</namePart>
<namePart type="family">Zong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fei</namePart>
<namePart type="family">Xia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wenjie</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roberto</namePart>
<namePart type="family">Navigli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We propose a new architecture for adapting a sentence-level sequence-to-sequence transformer by incorporating multiple pre-trained document context signals and assess the impact on translation performance of (1) different pretraining approaches for generating these signals, (2) the quantity of parallel data for which document context is available, and (3) conditioning on source, target, or source and target contexts. Experiments on the NIST Chinese-English, and IWSLT and WMT English-German tasks support four general conclusions: that using pre-trained context representations markedly improves sample efficiency, that adequate parallel data resources are crucial for learning to use document context, that jointly conditioning on multiple context representations outperforms any single representation, and that source context is more valuable for translation performance than target side context. Our best multi-context model consistently outperforms the best existing context-aware transformers.</abstract>
<identifier type="citekey">donato-etal-2021-diverse</identifier>
<identifier type="doi">10.18653/v1/2021.acl-long.104</identifier>
<location>
<url>https://aclanthology.org/2021.acl-long.104</url>
</location>
<part>
<date>2021-08</date>
<extent unit="page">
<start>1299</start>
<end>1311</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Diverse Pretrained Context Encodings Improve Document Translation
%A Donato, Domenic
%A Yu, Lei
%A Dyer, Chris
%Y Zong, Chengqing
%Y Xia, Fei
%Y Li, Wenjie
%Y Navigli, Roberto
%S Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)
%D 2021
%8 August
%I Association for Computational Linguistics
%C Online
%F donato-etal-2021-diverse
%X We propose a new architecture for adapting a sentence-level sequence-to-sequence transformer by incorporating multiple pre-trained document context signals and assess the impact on translation performance of (1) different pretraining approaches for generating these signals, (2) the quantity of parallel data for which document context is available, and (3) conditioning on source, target, or source and target contexts. Experiments on the NIST Chinese-English, and IWSLT and WMT English-German tasks support four general conclusions: that using pre-trained context representations markedly improves sample efficiency, that adequate parallel data resources are crucial for learning to use document context, that jointly conditioning on multiple context representations outperforms any single representation, and that source context is more valuable for translation performance than target side context. Our best multi-context model consistently outperforms the best existing context-aware transformers.
%R 10.18653/v1/2021.acl-long.104
%U https://aclanthology.org/2021.acl-long.104
%U https://doi.org/10.18653/v1/2021.acl-long.104
%P 1299-1311
Markdown (Informal)
[Diverse Pretrained Context Encodings Improve Document Translation](https://aclanthology.org/2021.acl-long.104) (Donato et al., ACL-IJCNLP 2021)
ACL
- Domenic Donato, Lei Yu, and Chris Dyer. 2021. Diverse Pretrained Context Encodings Improve Document Translation. In Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers), pages 1299–1311, Online. Association for Computational Linguistics.