@InProceedings{wang-EtAl:2017:Long1,
  author    = {Wang, Mingxuan  and  Lu, Zhengdong  and  Zhou, Jie  and  Liu, Qun},
  title     = {Deep Neural Machine Translation with Linear Associative Unit},
  booktitle = {Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
  month     = {July},
  year      = {2017},
  address   = {Vancouver, Canada},
  publisher = {Association for Computational Linguistics},
  pages     = {136--145},
  abstract  = {Deep Neural Networks (DNNs) have provably enhanced the
	state-of-the-art Neural  Machine Translation (NMT) with
	 its capability in modeling complex functions and capturing
	  complex linguistic structures.
	  However NMT with deep architecture in its encoder or
	  decoder RNNs often suffer from severe gradient diffusion
	  due to the non-linear recurrent activations, which often
	  makes the optimization much more difficult.
	   To address this problem we propose a novel linear
	   associative units (LAU)  to reduce the gradient
	    propagation path inside the recurrent unit.
	    Different from conventional
	    approaches (LSTM unit and GRU),
	   LAUs uses linear associative connections
	   between input and
	   output of the recurrent unit,
	   which allows unimpeded information flow through both
	    space and time  The model is quite simple,
	     but it is surprisingly effective. Our empirical
	     study on Chinese-English translation shows that our
	     model with proper configuration can improve
	      by 11.7 BLEU upon Groundhog and the best
	      reported on results in the same setting.
	      On WMT14 English-German task and a larger WMT14
	       English-French task, our
	 model achieves comparable results with the state-of-the-art.},
  url       = {http://aclweb.org/anthology/P17-1013}
}

