@inproceedings{sharaf-daume-iii-2017-structured,
title = "Structured Prediction via Learning to Search under Bandit Feedback",
author = "Sharaf, Amr and
Daum{\'e} III, Hal",
editor = "Chang, Kai-Wei and
Chang, Ming-Wei and
Srikumar, Vivek and
Rush, Alexander M.",
booktitle = "Proceedings of the 2nd Workshop on Structured Prediction for Natural Language Processing",
month = sep,
year = "2017",
address = "Copenhagen, Denmark",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-4304",
doi = "10.18653/v1/W17-4304",
pages = "17--26",
abstract = "We present an algorithm for structured prediction under online bandit feedback. The learner repeatedly predicts a sequence of actions, generating a structured output. It then observes feedback for that output and no others. We consider two cases: a pure bandit setting in which it only observes a loss, and more fine-grained feedback in which it observes a loss for every action. We find that the fine-grained feedback is necessary for strong empirical performance, because it allows for a robust variance-reduction strategy. We empirically compare a number of different algorithms and exploration methods and show the efficacy of BLS on sequence labeling and dependency parsing tasks.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sharaf-daume-iii-2017-structured">
<titleInfo>
<title>Structured Prediction via Learning to Search under Bandit Feedback</title>
</titleInfo>
<name type="personal">
<namePart type="given">Amr</namePart>
<namePart type="family">Sharaf</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hal</namePart>
<namePart type="family">Daumé III</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on Structured Prediction for Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kai-Wei</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ming-Wei</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Srikumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Rush</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Copenhagen, Denmark</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present an algorithm for structured prediction under online bandit feedback. The learner repeatedly predicts a sequence of actions, generating a structured output. It then observes feedback for that output and no others. We consider two cases: a pure bandit setting in which it only observes a loss, and more fine-grained feedback in which it observes a loss for every action. We find that the fine-grained feedback is necessary for strong empirical performance, because it allows for a robust variance-reduction strategy. We empirically compare a number of different algorithms and exploration methods and show the efficacy of BLS on sequence labeling and dependency parsing tasks.</abstract>
<identifier type="citekey">sharaf-daume-iii-2017-structured</identifier>
<identifier type="doi">10.18653/v1/W17-4304</identifier>
<location>
<url>https://aclanthology.org/W17-4304</url>
</location>
<part>
<date>2017-09</date>
<extent unit="page">
<start>17</start>
<end>26</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Structured Prediction via Learning to Search under Bandit Feedback
%A Sharaf, Amr
%A Daumé III, Hal
%Y Chang, Kai-Wei
%Y Chang, Ming-Wei
%Y Srikumar, Vivek
%Y Rush, Alexander M.
%S Proceedings of the 2nd Workshop on Structured Prediction for Natural Language Processing
%D 2017
%8 September
%I Association for Computational Linguistics
%C Copenhagen, Denmark
%F sharaf-daume-iii-2017-structured
%X We present an algorithm for structured prediction under online bandit feedback. The learner repeatedly predicts a sequence of actions, generating a structured output. It then observes feedback for that output and no others. We consider two cases: a pure bandit setting in which it only observes a loss, and more fine-grained feedback in which it observes a loss for every action. We find that the fine-grained feedback is necessary for strong empirical performance, because it allows for a robust variance-reduction strategy. We empirically compare a number of different algorithms and exploration methods and show the efficacy of BLS on sequence labeling and dependency parsing tasks.
%R 10.18653/v1/W17-4304
%U https://aclanthology.org/W17-4304
%U https://doi.org/10.18653/v1/W17-4304
%P 17-26
Markdown (Informal)
[Structured Prediction via Learning to Search under Bandit Feedback](https://aclanthology.org/W17-4304) (Sharaf & Daumé III, 2017)
ACL