@InProceedings{domhan-hieber:2017:EMNLP2017,
  author    = {Domhan, Tobias  and  Hieber, Felix},
  title     = {Using Target-side Monolingual Data for Neural Machine Translation through Multi-task Learning},
  booktitle = {Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing},
  month     = {September},
  year      = {2017},
  address   = {Copenhagen, Denmark},
  publisher = {Association for Computational Linguistics},
  pages     = {1500--1505},
  abstract  = {The performance of Neural Machine Translation (NMT) models relies heavily on
	the availability of sufficient amounts of parallel data, and an efficient and
	effective way of leveraging the vastly available amounts of monolingual data
	has yet to be found.
	We propose to modify the decoder in a neural sequence-to-sequence model to
	enable multi-task learning for two strongly related tasks: target-side language
	modeling and translation.
	The decoder predicts the next target word through two channels, a target-side
	language model on the lowest layer, and an attentional recurrent model which is
	conditioned on the source representation.
	This architecture allows joint training on both large amounts of monolingual
	and moderate amounts of bilingual data to improve NMT performance.
	Initial results in the news domain for three language pairs show moderate but
	consistent improvements over a baseline trained on bilingual data only.},
  url       = {https://www.aclweb.org/anthology/D17-1158}
}