@inproceedings{zeyer-etal-2018-returnn,
title = "{RETURNN} as a Generic Flexible Neural Toolkit with Application to Translation and Speech Recognition",
author = "Zeyer, Albert and
Alkhouli, Tamer and
Ney, Hermann",
editor = "Liu, Fei and
Solorio, Thamar",
booktitle = "Proceedings of {ACL} 2018, System Demonstrations",
month = jul,
year = "2018",
address = "Melbourne, Australia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/P18-4022",
doi = "10.18653/v1/P18-4022",
pages = "128--133",
abstract = "We compare the fast training and decoding speed of RETURNN of attention models for translation, due to fast CUDA LSTM kernels, and a fast pure TensorFlow beam search decoder. We show that a layer-wise pretraining scheme for recurrent attention models gives over 1{\%} BLEU improvement absolute and it allows to train deeper recurrent encoder networks. Promising preliminary results on max. expected BLEU training are presented. We are able to train state-of-the-art models for translation and end-to-end models for speech recognition and show results on WMT 2017 and Switchboard. The flexibility of RETURNN allows a fast research feedback loop to experiment with alternative architectures, and its generality allows to use it on a wide range of applications.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zeyer-etal-2018-returnn">
<titleInfo>
<title>RETURNN as a Generic Flexible Neural Toolkit with Application to Translation and Speech Recognition</title>
</titleInfo>
<name type="personal">
<namePart type="given">Albert</namePart>
<namePart type="family">Zeyer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tamer</namePart>
<namePart type="family">Alkhouli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hermann</namePart>
<namePart type="family">Ney</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of ACL 2018, System Demonstrations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Fei</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thamar</namePart>
<namePart type="family">Solorio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Melbourne, Australia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We compare the fast training and decoding speed of RETURNN of attention models for translation, due to fast CUDA LSTM kernels, and a fast pure TensorFlow beam search decoder. We show that a layer-wise pretraining scheme for recurrent attention models gives over 1% BLEU improvement absolute and it allows to train deeper recurrent encoder networks. Promising preliminary results on max. expected BLEU training are presented. We are able to train state-of-the-art models for translation and end-to-end models for speech recognition and show results on WMT 2017 and Switchboard. The flexibility of RETURNN allows a fast research feedback loop to experiment with alternative architectures, and its generality allows to use it on a wide range of applications.</abstract>
<identifier type="citekey">zeyer-etal-2018-returnn</identifier>
<identifier type="doi">10.18653/v1/P18-4022</identifier>
<location>
<url>https://aclanthology.org/P18-4022</url>
</location>
<part>
<date>2018-07</date>
<extent unit="page">
<start>128</start>
<end>133</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T RETURNN as a Generic Flexible Neural Toolkit with Application to Translation and Speech Recognition
%A Zeyer, Albert
%A Alkhouli, Tamer
%A Ney, Hermann
%Y Liu, Fei
%Y Solorio, Thamar
%S Proceedings of ACL 2018, System Demonstrations
%D 2018
%8 July
%I Association for Computational Linguistics
%C Melbourne, Australia
%F zeyer-etal-2018-returnn
%X We compare the fast training and decoding speed of RETURNN of attention models for translation, due to fast CUDA LSTM kernels, and a fast pure TensorFlow beam search decoder. We show that a layer-wise pretraining scheme for recurrent attention models gives over 1% BLEU improvement absolute and it allows to train deeper recurrent encoder networks. Promising preliminary results on max. expected BLEU training are presented. We are able to train state-of-the-art models for translation and end-to-end models for speech recognition and show results on WMT 2017 and Switchboard. The flexibility of RETURNN allows a fast research feedback loop to experiment with alternative architectures, and its generality allows to use it on a wide range of applications.
%R 10.18653/v1/P18-4022
%U https://aclanthology.org/P18-4022
%U https://doi.org/10.18653/v1/P18-4022
%P 128-133
Markdown (Informal)
[RETURNN as a Generic Flexible Neural Toolkit with Application to Translation and Speech Recognition](https://aclanthology.org/P18-4022) (Zeyer et al., ACL 2018)
ACL