@InProceedings{zeyer-alkhouli-ney:2018:Demos,
  author    = {Zeyer, Albert  and  Alkhouli, Tamer  and  Ney, Hermann},
  title     = {RETURNN as a Generic Flexible Neural Toolkit with Application to Translation and Speech Recognition},
  booktitle = {Proceedings of ACL 2018, System Demonstrations},
  month     = {July},
  year      = {2018},
  address   = {Melbourne, Australia},
  publisher = {Association for Computational Linguistics},
  pages     = {128--133},
  abstract  = {We compare the fast training and decoding speed of RETURNN of attention mod- els for translation, due to fast CUDA LSTM kernels, and a fast pure TensorFlow beam search decoder. We show that a layer-wise pretraining scheme for recurrent attention models gives over 1% BLEU improvement absolute and it allows to train deeper recurrent encoder networks. Promising preliminary results on max. expected BLEU training are presented. We are able to train state-of-the-art models for translation and end-to-end models for speech recognition and show results on WMT 2017 and Switchboard. The flexibility of RETURNN allows a fast research feedback loop to experiment with alternative architectures, and its generality allows to use it on a wide range of applications.},
  url       = {http://www.aclweb.org/anthology/P18-4022}
}