@inproceedings{saito-2018-curriculum,
title = "Curriculum Learning Based on Reward Sparseness for Deep Reinforcement Learning of Task Completion Dialogue Management",
author = "Saito, Atsushi",
editor = "Chuklin, Aleksandr and
Dalton, Jeff and
Kiseleva, Julia and
Borisov, Alexey and
Burtsev, Mikhail",
booktitle = "Proceedings of the 2018 {EMNLP} Workshop {SCAI}: The 2nd International Workshop on Search-Oriented Conversational {AI}",
month = oct,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-5707",
doi = "10.18653/v1/W18-5707",
pages = "46--51",
abstract = "Learning from sparse and delayed reward is a central issue in reinforcement learning. In this paper, to tackle reward sparseness problem of task oriented dialogue management, we propose a curriculum based approach on the number of slots of user goals. This curriculum makes it possible to learn dialogue management for sets of user goals with large number of slots. We also propose a dialogue policy based on progressive neural networks whose modules with parameters are appended with previous parameters fixed as the curriculum proceeds, and this policy improves performances over the one with single set of parameters.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="saito-2018-curriculum">
<titleInfo>
<title>Curriculum Learning Based on Reward Sparseness for Deep Reinforcement Learning of Task Completion Dialogue Management</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atsushi</namePart>
<namePart type="family">Saito</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2018 EMNLP Workshop SCAI: The 2nd International Workshop on Search-Oriented Conversational AI</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aleksandr</namePart>
<namePart type="family">Chuklin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jeff</namePart>
<namePart type="family">Dalton</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Kiseleva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexey</namePart>
<namePart type="family">Borisov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mikhail</namePart>
<namePart type="family">Burtsev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Brussels, Belgium</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Learning from sparse and delayed reward is a central issue in reinforcement learning. In this paper, to tackle reward sparseness problem of task oriented dialogue management, we propose a curriculum based approach on the number of slots of user goals. This curriculum makes it possible to learn dialogue management for sets of user goals with large number of slots. We also propose a dialogue policy based on progressive neural networks whose modules with parameters are appended with previous parameters fixed as the curriculum proceeds, and this policy improves performances over the one with single set of parameters.</abstract>
<identifier type="citekey">saito-2018-curriculum</identifier>
<identifier type="doi">10.18653/v1/W18-5707</identifier>
<location>
<url>https://aclanthology.org/W18-5707</url>
</location>
<part>
<date>2018-10</date>
<extent unit="page">
<start>46</start>
<end>51</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Curriculum Learning Based on Reward Sparseness for Deep Reinforcement Learning of Task Completion Dialogue Management
%A Saito, Atsushi
%Y Chuklin, Aleksandr
%Y Dalton, Jeff
%Y Kiseleva, Julia
%Y Borisov, Alexey
%Y Burtsev, Mikhail
%S Proceedings of the 2018 EMNLP Workshop SCAI: The 2nd International Workshop on Search-Oriented Conversational AI
%D 2018
%8 October
%I Association for Computational Linguistics
%C Brussels, Belgium
%F saito-2018-curriculum
%X Learning from sparse and delayed reward is a central issue in reinforcement learning. In this paper, to tackle reward sparseness problem of task oriented dialogue management, we propose a curriculum based approach on the number of slots of user goals. This curriculum makes it possible to learn dialogue management for sets of user goals with large number of slots. We also propose a dialogue policy based on progressive neural networks whose modules with parameters are appended with previous parameters fixed as the curriculum proceeds, and this policy improves performances over the one with single set of parameters.
%R 10.18653/v1/W18-5707
%U https://aclanthology.org/W18-5707
%U https://doi.org/10.18653/v1/W18-5707
%P 46-51
Markdown (Informal)
[Curriculum Learning Based on Reward Sparseness for Deep Reinforcement Learning of Task Completion Dialogue Management](https://aclanthology.org/W18-5707) (Saito, EMNLP 2018)
ACL