@InProceedings{dai-xie-hovy:2018:Long,
  author    = {Dai, Zihang  and  Xie, Qizhe  and  Hovy, Eduard},
  title     = {From Credit Assignment to Entropy Regularization: Two New Algorithms for Neural Sequence Prediction},
  booktitle = {Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
  month     = {July},
  year      = {2018},
  address   = {Melbourne, Australia},
  publisher = {Association for Computational Linguistics},
  pages     = {1672--1682},
  abstract  = {In this work, we study the credit assignment problem in reward augmented maximum likelihood (RAML) learning, and establish a theoretical equivalence between the token-level counterpart of RAML and the entropy regularized reinforcement learning. Inspired by the connection, we propose two sequence prediction algorithms, one extending RAML with fine-grained credit assignment and the other improving Actor-Critic with a systematic entropy regularization. On two benchmark datasets, we show the proposed algorithms outperform RAML and Actor-Critic respectively, providing new alternatives to sequence prediction.},
  url       = {http://www.aclweb.org/anthology/P18-1155}
}

