@InProceedings{ultes-EtAl:2017:W17-55,
  author    = {Ultes, Stefan  and  Budzianowski, Pawe{\l}  and  Casanueva, I\~{n}igo  and  Mrk\v{s}i\'{c}, Nikola  and  Rojas Barahona, Lina M.  and  Su, Pei-Hao  and  Wen, Tsung-Hsien  and  Gasic, Milica  and  Young, Steve},
  title     = {Reward-Balancing for Statistical Spoken Dialogue Systems using Multi-objective Reinforcement Learning},
  booktitle = {Proceedings of the 18th Annual SIGdial Meeting on Discourse and Dialogue},
  month     = {August},
  year      = {2017},
  address   = {Saarbrücken, Germany},
  publisher = {Association for Computational Linguistics},
  pages     = {65--70},
  abstract  = {Reinforcement learning is widely used for dialogue policy optimization where
	the reward function often consists of more than one component, e.g., the
	dialogue success and the dialogue length. In this work, we propose a structured
	method for finding a good balance between these components by searching for the
	optimal reward component weighting. To render this search feasible, we use
	multi-objective reinforcement learning to significantly reduce the number of
	training dialogues required. We apply our proposed method to find optimized
	component weights for six domains and compare them to a default baseline.},
  url       = {http://aclweb.org/anthology/W17-5509}
}

