@inproceedings{akoury-etal-2019-syntactically,
title = "Syntactically Supervised Transformers for Faster Neural Machine Translation",
author = "Akoury, Nader and
Krishna, Kalpesh and
Iyyer, Mohit",
editor = "Korhonen, Anna and
Traum, David and
M{\`a}rquez, Llu{\'\i}s",
booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics",
month = jul,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/P19-1122",
doi = "10.18653/v1/P19-1122",
pages = "1269--1281",
abstract = "Standard decoders for neural machine translation autoregressively generate a single target token per timestep, which slows inference especially for long outputs. While architectural advances such as the Transformer fully parallelize the decoder computations at training time, inference still proceeds sequentially. Recent developments in non- and semi-autoregressive decoding produce multiple tokens per timestep independently of the others, which improves inference speed but deteriorates translation quality. In this work, we propose the syntactically supervised Transformer (SynST), which first autoregressively predicts a chunked parse tree before generating all of the target tokens in one shot conditioned on the predicted parse. A series of controlled experiments demonstrates that SynST decodes sentences {\textasciitilde}5x faster than the baseline autoregressive Transformer while achieving higher BLEU scores than most competing methods on En-De and En-Fr datasets.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="akoury-etal-2019-syntactically">
<titleInfo>
<title>Syntactically Supervised Transformers for Faster Neural Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nader</namePart>
<namePart type="family">Akoury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalpesh</namePart>
<namePart type="family">Krishna</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Iyyer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Korhonen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Traum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Màrquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Standard decoders for neural machine translation autoregressively generate a single target token per timestep, which slows inference especially for long outputs. While architectural advances such as the Transformer fully parallelize the decoder computations at training time, inference still proceeds sequentially. Recent developments in non- and semi-autoregressive decoding produce multiple tokens per timestep independently of the others, which improves inference speed but deteriorates translation quality. In this work, we propose the syntactically supervised Transformer (SynST), which first autoregressively predicts a chunked parse tree before generating all of the target tokens in one shot conditioned on the predicted parse. A series of controlled experiments demonstrates that SynST decodes sentences ~5x faster than the baseline autoregressive Transformer while achieving higher BLEU scores than most competing methods on En-De and En-Fr datasets.</abstract>
<identifier type="citekey">akoury-etal-2019-syntactically</identifier>
<identifier type="doi">10.18653/v1/P19-1122</identifier>
<location>
<url>https://aclanthology.org/P19-1122</url>
</location>
<part>
<date>2019-07</date>
<extent unit="page">
<start>1269</start>
<end>1281</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Syntactically Supervised Transformers for Faster Neural Machine Translation
%A Akoury, Nader
%A Krishna, Kalpesh
%A Iyyer, Mohit
%Y Korhonen, Anna
%Y Traum, David
%Y Màrquez, Lluís
%S Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics
%D 2019
%8 July
%I Association for Computational Linguistics
%C Florence, Italy
%F akoury-etal-2019-syntactically
%X Standard decoders for neural machine translation autoregressively generate a single target token per timestep, which slows inference especially for long outputs. While architectural advances such as the Transformer fully parallelize the decoder computations at training time, inference still proceeds sequentially. Recent developments in non- and semi-autoregressive decoding produce multiple tokens per timestep independently of the others, which improves inference speed but deteriorates translation quality. In this work, we propose the syntactically supervised Transformer (SynST), which first autoregressively predicts a chunked parse tree before generating all of the target tokens in one shot conditioned on the predicted parse. A series of controlled experiments demonstrates that SynST decodes sentences ~5x faster than the baseline autoregressive Transformer while achieving higher BLEU scores than most competing methods on En-De and En-Fr datasets.
%R 10.18653/v1/P19-1122
%U https://aclanthology.org/P19-1122
%U https://doi.org/10.18653/v1/P19-1122
%P 1269-1281
Markdown (Informal)
[Syntactically Supervised Transformers for Faster Neural Machine Translation](https://aclanthology.org/P19-1122) (Akoury et al., ACL 2019)
ACL