@InProceedings{chen-EtAl:2017:EACLshort,
  author    = {Chen, Lu  and  Yang, Runzhe  and  Chang, Cheng  and  Ye, Zihao  and  Zhou, Xiang  and  Yu, Kai},
  title     = {On-line Dialogue Policy Learning with Companion Teaching},
  booktitle = {Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers},
  month     = {April},
  year      = {2017},
  address   = {Valencia, Spain},
  publisher = {Association for Computational Linguistics},
  pages     = {198--204},
  abstract  = {On-line dialogue policy learning is the key for building evolvable
	conversational agent in real world scenarios. Poor initial policy can easily
	lead to bad user experience and consequently fail to attract sufficient users
	for policy training. A novel framework,  companion teaching, is proposed to
	include a human teacher in the dialogue policy training loop to address the
	cold start problem. Here, dialogue policy is trained using not only user's
	reward, but also teacher's example action as well as estimated immediate reward
	at turn level. Simulation experiments showed that, with small number of human
	teaching dialogues, the proposed approach can effectively improve user
	experience at the beginning and smoothly lead to good performance with more
	user interaction data.},
  url       = {http://www.aclweb.org/anthology/E17-2032}
}