@InProceedings{bradbury-socher:2017:StructPred,
  author    = {Bradbury, James  and  Socher, Richard},
  title     = {Towards Neural Machine Translation with Latent Tree Attention},
  booktitle = {Proceedings of the 2nd Workshop on Structured Prediction for Natural Language Processing},
  month     = {September},
  year      = {2017},
  address   = {Copenhagen, Denmark},
  publisher = {Association for Computational Linguistics},
  pages     = {12--16},
  abstract  = {Building models that take advantage of the hierarchical structure of language
	without a priori annotation is a longstanding goal in natural language
	processing. We introduce such a model for the task of machine translation,
	pairing a recurrent neural network grammar encoder with a novel attentional
	RNNG decoder and applying policy gradient reinforcement learning to induce
	unsupervised tree structures on both the source and target. When trained on
	character-level datasets with no explicit segmentation or parse annotation, the
	model learns a plausible segmentation and shallow parse, obtaining performance
	close to an attentional baseline.},
  url       = {http://www.aclweb.org/anthology/W17-4303}
}