@InProceedings{weng-EtAl:2017:EMNLP2017,
  author    = {Weng, Rongxiang  and  Huang, Shujian  and  Zheng, Zaixiang  and  DAI, XIN-YU  and  CHEN, Jiajun},
  title     = {Neural Machine Translation with Word Predictions},
  booktitle = {Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing},
  month     = {September},
  year      = {2017},
  address   = {Copenhagen, Denmark},
  publisher = {Association for Computational Linguistics},
  pages     = {136--145},
  abstract  = {In the encoder-decoder architecture for neural machine translation (NMT), the
	hidden states of the recurrent structures in the encoder and decoder carry the
	crucial information about the sentence. These vectors are generated by
	parameters which are updated by back-propagation of translation errors through
	time.We argue that propagating errors through the end-to-end recurrent
	structures are not a direct way of control the hidden vectors. 
	In this paper, we propose to use word predictions as a mechanism for direct
	supervision. More specifically, we require these vectors to be able to predict
	the vocabulary in target sentence. Our simple mechanism ensures better
	representations in the encoder and decoder without using any extra data or
	annotation. It is also helpful in reducing the target side vocabulary and
	improving the decoding efficiency. Experiments on Chinese-English machine
	translation task show an average BLEU improvement by 4.53, respectively.},
  url       = {https://www.aclweb.org/anthology/D17-1013}
}

