@InProceedings{gan-EtAl:2017:Long,
  author    = {Gan, Zhe  and  Li, Chunyuan  and  Chen, Changyou  and  Pu, Yunchen  and  Su, Qinliang  and  Carin, Lawrence},
  title     = {Scalable Bayesian Learning of Recurrent Neural Networks for Language Modeling},
  booktitle = {Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
  month     = {July},
  year      = {2017},
  address   = {Vancouver, Canada},
  publisher = {Association for Computational Linguistics},
  pages     = {321--331},
  abstract  = {Recurrent neural networks (RNNs) have shown promising performance for language
	modeling. However, traditional training of RNNs using back-propagation through
	time often suffers from overfitting. One reason for this is that stochastic
	optimization (used for large training sets) does not provide good estimates of
	model uncertainty. This paper leverages recent advances in stochastic gradient
	Markov Chain Monte Carlo (also appropriate for large training sets) to learn
	weight uncertainty in RNNs. It yields a principled Bayesian learning algorithm,
	adding gradient noise during training (enhancing exploration of the
	model-parameter space) and model averaging when testing. Extensive experiments
	on various RNN models and across a broad range of applications demonstrate the
	superiority of the proposed approach relative to stochastic optimization.},
  url       = {http://aclweb.org/anthology/P17-1030}
}