@InProceedings{britz-EtAl:2017:EMNLP2017,
  author    = {Britz, Denny  and  Goldie, Anna  and  Luong, Minh-Thang  and  Le, Quoc},
  title     = {Massive Exploration of Neural Machine Translation Architectures},
  booktitle = {Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing},
  month     = {September},
  year      = {2017},
  address   = {Copenhagen, Denmark},
  publisher = {Association for Computational Linguistics},
  pages     = {1442--1451},
  abstract  = {Neural Machine Translation (NMT) has shown remarkable progress over the past
	few years, with production systems now being deployed to end-users.
	    As the field is moving rapidly, it has become unclear which elements of NMT
	architectures have a significant impact on translation quality.
	    In this work, we present a large-scale analysis of the sensitivity of NMT
	architectures to common hyperparameters. We report empirical results and
	variance numbers for several hundred experimental runs, corresponding to over
	250,000 GPU hours on a WMT English to German translation task. Our experiments
	provide practical insights into the relative importance of factors such as
	embedding size, network depth, RNN cell type, residual connections, attention
	mechanism, and decoding heuristics. As part of this contribution, we also
	release an open-source NMT framework in TensorFlow to make it easy for others
	to reproduce our results and perform their own experiments.},
  url       = {https://www.aclweb.org/anthology/D17-1151}
}

