@InProceedings{zhou-EtAl:2017:Short2,
  author    = {Zhou, Hao  and  Tu, Zhaopeng  and  Huang, Shujian  and  Liu, Xiaohua  and  Li, Hang  and  Chen, Jiajun},
  title     = {Chunk-Based Bi-Scale Decoder for Neural Machine Translation},
  booktitle = {Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
  month     = {July},
  year      = {2017},
  address   = {Vancouver, Canada},
  publisher = {Association for Computational Linguistics},
  pages     = {580--586},
  abstract  = {In typical neural machine translation~(NMT), the decoder generates a sentence
	word by word, packing all linguistic granularities in the same time-scale of
	RNN. In this paper, we propose a new type of decoder for NMT, which splits the
	decode state into two parts and updates them in two different time-scales.
	Specifically, we first predict a chunk time-scale state for phrasal modeling,
	on top of which multiple word time-scale states are generated.
	In this way, the target sentence is translated hierarchically from chunks to
	words, with information in different granularities being leveraged.
	Experiments show that our proposed model significantly improves the translation
	performance over the state-of-the-art NMT model.},
  url       = {http://aclweb.org/anthology/P17-2092}
}