@InProceedings{wang-EtAl:2018:C18-18,
  author    = {Wang, Qiang  and  Li, Fuxue  and  Xiao, Tong  and  Li, Yanyang  and  Li, Yinqiao  and  Zhu, Jingbo},
  title     = {Multi-layer Representation Fusion for Neural Machine Translation},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {3015--3026},
  abstract  = {Neural machine translation systems require a number of stacked layers for deep models. But the prediction depends on the sentence representation of the top-most layer with no access to low-level representations. This makes it more difficult to train the model and poses a risk of information loss to prediction. In this paper, we propose a multi-layer representation fusion (MLRF) approach to fusing stacked layers. In particular, we design three fusion functions to learn a better representation from the stack. Experimental results show that our approach yields improvements of 0.92 and 0.56 BLEU points over the strong Transformer baseline on IWSLT German-English and NIST Chinese-English MT tasks respectively. The result is new state-of-the-art in German-English translation.},
  url       = {http://www.aclweb.org/anthology/C18-1255}
}

