@inproceedings{kreutzer-etal-2018-reliability,
    title = "Reliability and Learnability of Human Bandit Feedback for Sequence-to-Sequence Reinforcement Learning",
    author = "Kreutzer, Julia  and
      Uyheng, Joshua  and
      Riezler, Stefan",
    editor = "Gurevych, Iryna  and
      Miyao, Yusuke",
    booktitle = "Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
    month = jul,
    year = "2018",
    address = "Melbourne, Australia",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/P18-1165/",
    doi = "10.18653/v1/P18-1165",
    pages = "1777--1788"
}