@inproceedings{kreutzer-etal-2018-reliability, title = "Reliability and Learnability of Human Bandit Feedback for Sequence-to-Sequence Reinforcement Learning", author = "Kreutzer, Julia and Uyheng, Joshua and Riezler, Stefan", editor = "Gurevych, Iryna and Miyao, Yusuke", booktitle = "Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)", month = jul, year = "2018", address = "Melbourne, Australia", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/P18-1165/", doi = "10.18653/v1/P18-1165", pages = "1777--1788" }