@InProceedings{junczysdowmunt-EtAl:2018:WNMT2018,
  author    = {Junczys-Dowmunt, Marcin  and  Heafield, Kenneth  and  Hoang, Hieu  and  Grundkiewicz, Roman  and  Aue, Anthony},
  title     = {Marian: Cost-effective High-Quality Neural Machine Translation in C++},
  booktitle = {Proceedings of the 2nd Workshop on Neural Machine Translation and Generation},
  month     = {July},
  year      = {2018},
  address   = {Melbourne, Australia},
  publisher = {Association for Computational Linguistics},
  pages     = {129--135},
  abstract  = {This paper describes the submissions of the ``Marian'' team to the WNMT 2018 shared task. We investigate combinations of teacher-student training, low-precision matrix products, auto-tuning and other methods to optimize the Transformer model on GPU and CPU. By further integrating these methods with the new averaging attention networks, a recently introduced faster Transformer variant, we create a number of high-quality, high-performance models on the GPU and CPU, dominating the Pareto frontier for this shared task.},
  url       = {http://www.aclweb.org/anthology/W18-2716}
}