@inproceedings{gordon-hall-etal-2020-learning,
title = "{L}earning {D}ialog {P}olicies from {W}eak {D}emonstrations",
author = "Gordon-Hall, Gabriel and
Gorinski, Philip John and
Cohen, Shay B.",
editor = "Jurafsky, Dan and
Chai, Joyce and
Schluter, Natalie and
Tetreault, Joel",
booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.acl-main.129",
doi = "10.18653/v1/2020.acl-main.129",
pages = "1394--1405",
abstract = "Deep reinforcement learning is a promising approach to training a dialog manager, but current methods struggle with the large state and action spaces of multi-domain dialog systems. Building upon Deep Q-learning from Demonstrations (DQfD), an algorithm that scores highly in difficult Atari games, we leverage dialog data to guide the agent to successfully respond to a user{'}s requests. We make progressively fewer assumptions about the data needed, using labeled, reduced-labeled, and even unlabeled data to train expert demonstrators. We introduce Reinforced Fine-tune Learning, an extension to DQfD, enabling us to overcome the domain gap between the datasets and the environment. Experiments in a challenging multi-domain dialog system framework validate our approaches, and get high success rates even when trained on out-of-domain data.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gordon-hall-etal-2020-learning">
<titleInfo>
<title>Learning Dialog Policies from Weak Demonstrations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gabriel</namePart>
<namePart type="family">Gordon-Hall</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philip</namePart>
<namePart type="given">John</namePart>
<namePart type="family">Gorinski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shay</namePart>
<namePart type="given">B</namePart>
<namePart type="family">Cohen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Jurafsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Chai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Natalie</namePart>
<namePart type="family">Schluter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joel</namePart>
<namePart type="family">Tetreault</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Deep reinforcement learning is a promising approach to training a dialog manager, but current methods struggle with the large state and action spaces of multi-domain dialog systems. Building upon Deep Q-learning from Demonstrations (DQfD), an algorithm that scores highly in difficult Atari games, we leverage dialog data to guide the agent to successfully respond to a user’s requests. We make progressively fewer assumptions about the data needed, using labeled, reduced-labeled, and even unlabeled data to train expert demonstrators. We introduce Reinforced Fine-tune Learning, an extension to DQfD, enabling us to overcome the domain gap between the datasets and the environment. Experiments in a challenging multi-domain dialog system framework validate our approaches, and get high success rates even when trained on out-of-domain data.</abstract>
<identifier type="citekey">gordon-hall-etal-2020-learning</identifier>
<identifier type="doi">10.18653/v1/2020.acl-main.129</identifier>
<location>
<url>https://aclanthology.org/2020.acl-main.129</url>
</location>
<part>
<date>2020-07</date>
<extent unit="page">
<start>1394</start>
<end>1405</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Learning Dialog Policies from Weak Demonstrations
%A Gordon-Hall, Gabriel
%A Gorinski, Philip John
%A Cohen, Shay B.
%Y Jurafsky, Dan
%Y Chai, Joyce
%Y Schluter, Natalie
%Y Tetreault, Joel
%S Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics
%D 2020
%8 July
%I Association for Computational Linguistics
%C Online
%F gordon-hall-etal-2020-learning
%X Deep reinforcement learning is a promising approach to training a dialog manager, but current methods struggle with the large state and action spaces of multi-domain dialog systems. Building upon Deep Q-learning from Demonstrations (DQfD), an algorithm that scores highly in difficult Atari games, we leverage dialog data to guide the agent to successfully respond to a user’s requests. We make progressively fewer assumptions about the data needed, using labeled, reduced-labeled, and even unlabeled data to train expert demonstrators. We introduce Reinforced Fine-tune Learning, an extension to DQfD, enabling us to overcome the domain gap between the datasets and the environment. Experiments in a challenging multi-domain dialog system framework validate our approaches, and get high success rates even when trained on out-of-domain data.
%R 10.18653/v1/2020.acl-main.129
%U https://aclanthology.org/2020.acl-main.129
%U https://doi.org/10.18653/v1/2020.acl-main.129
%P 1394-1405
Markdown (Informal)
[Learning Dialog Policies from Weak Demonstrations](https://aclanthology.org/2020.acl-main.129) (Gordon-Hall et al., ACL 2020)
ACL
- Gabriel Gordon-Hall, Philip John Gorinski, and Shay B. Cohen. 2020. Learning Dialog Policies from Weak Demonstrations. In Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pages 1394–1405, Online. Association for Computational Linguistics.