@InProceedings{gulcehre-EtAl:2017:RepL4NLP,
  author    = {Gulcehre, Caglar  and  Dutil, Francis  and  Trischler, Adam  and  Bengio, Yoshua},
  title     = {Plan, Attend, Generate: Character-Level Neural Machine Translation with Planning},
  booktitle = {Proceedings of the 2nd Workshop on Representation Learning for NLP},
  month     = {August},
  year      = {2017},
  address   = {Vancouver, Canada},
  publisher = {Association for Computational Linguistics},
  pages     = {228--234},
  abstract  = {We investigate the integration of a planning mechanism into an encoder-decoder
	architecture with attention. We develop a model that can plan ahead when it
	computes alignments between the source and target sequences not only for a
	single time-step but for the next k time-steps as well by constructing a matrix
	of proposed future alignments and a commitment vector that governs whether to
	follow or recompute the plan. This mechanism is inspired by strategic attentive
	reader and writer (STRAW) model, a recent neural architecture for planning with
	hierarchical reinforcement learning that can also learn higher level temporal
	abstractions. Our proposed model is end-to-end trainable with differentiable
	operations. We show that our model outperforms strong baselines on
	character-level translation task from WMT'15 with fewer parameters and computes
	alignments that are qualitatively intuitive.},
  url       = {http://www.aclweb.org/anthology/W17-2627}
}