@InProceedings{sharaf-daumeiii:2017:StructPred,
  author    = {Sharaf, Amr  and  Daum\'{e} III, Hal},
  title     = {Structured Prediction via Learning to Search under Bandit Feedback},
  booktitle = {Proceedings of the 2nd Workshop on Structured Prediction for Natural Language Processing},
  month     = {September},
  year      = {2017},
  address   = {Copenhagen, Denmark},
  publisher = {Association for Computational Linguistics},
  pages     = {17--26},
  abstract  = {We present an algorithm for structured prediction under online bandit feedback.
	The learner repeatedly predicts a sequence of actions, generating a structured
	output. It then observes feedback for that output and no others. We consider
	two cases: a pure bandit setting in which it only observes a loss, and more
	fine-grained feedback in which it observes a loss for every action. We find
	that the fine-grained feedback is necessary for strong empirical performance,
	because it allows for a robust variance-reduction strategy. We empirically
	compare a number of different algorithms and exploration methods and show the
	efficacy of BLS on sequence labeling and dependency parsing tasks.},
  url       = {http://www.aclweb.org/anthology/W17-4304}
}

