@inproceedings{chen-etal-2017-agent,
title = "Agent-Aware Dropout {DQN} for Safe and Efficient On-line Dialogue Policy Learning",
author = "Chen, Lu and
Zhou, Xiang and
Chang, Cheng and
Yang, Runzhe and
Yu, Kai",
editor = "Palmer, Martha and
Hwa, Rebecca and
Riedel, Sebastian",
booktitle = "Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing",
month = sep,
year = "2017",
address = "Copenhagen, Denmark",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D17-1260",
doi = "10.18653/v1/D17-1260",
pages = "2454--2464",
abstract = "Hand-crafted rules and reinforcement learning (RL) are two popular choices to obtain dialogue policy. The rule-based policy is often reliable within predefined scope but not self-adaptable, whereas RL is evolvable with data but often suffers from a bad initial performance. We employ a \textit{companion learning} framework to integrate the two approaches for \textit{on-line} dialogue policy learning, in which a pre-defined rule-based policy acts as a {``}teacher{''} and guides a data-driven RL system by giving example actions as well as additional rewards. A novel \textit{agent-aware dropout} Deep Q-Network (AAD-DQN) is proposed to address the problem of when to consult the teacher and how to learn from the teacher{'}s experiences. AAD-DQN, as a data-driven student policy, provides (1) two separate experience memories for student and teacher, (2) an uncertainty estimated by dropout to control the timing of consultation and learning. Simulation experiments showed that the proposed approach can significantly improve both \textit{safety}and \textit{efficiency} of on-line policy optimization compared to other companion learning approaches as well as supervised pre-training using static dialogue corpus.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chen-etal-2017-agent">
<titleInfo>
<title>Agent-Aware Dropout DQN for Safe and Efficient On-line Dialogue Policy Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lu</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiang</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cheng</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Runzhe</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kai</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Martha</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rebecca</namePart>
<namePart type="family">Hwa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Riedel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Copenhagen, Denmark</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Hand-crafted rules and reinforcement learning (RL) are two popular choices to obtain dialogue policy. The rule-based policy is often reliable within predefined scope but not self-adaptable, whereas RL is evolvable with data but often suffers from a bad initial performance. We employ a companion learning framework to integrate the two approaches for on-line dialogue policy learning, in which a pre-defined rule-based policy acts as a “teacher” and guides a data-driven RL system by giving example actions as well as additional rewards. A novel agent-aware dropout Deep Q-Network (AAD-DQN) is proposed to address the problem of when to consult the teacher and how to learn from the teacher’s experiences. AAD-DQN, as a data-driven student policy, provides (1) two separate experience memories for student and teacher, (2) an uncertainty estimated by dropout to control the timing of consultation and learning. Simulation experiments showed that the proposed approach can significantly improve both safetyand efficiency of on-line policy optimization compared to other companion learning approaches as well as supervised pre-training using static dialogue corpus.</abstract>
<identifier type="citekey">chen-etal-2017-agent</identifier>
<identifier type="doi">10.18653/v1/D17-1260</identifier>
<location>
<url>https://aclanthology.org/D17-1260</url>
</location>
<part>
<date>2017-09</date>
<extent unit="page">
<start>2454</start>
<end>2464</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Agent-Aware Dropout DQN for Safe and Efficient On-line Dialogue Policy Learning
%A Chen, Lu
%A Zhou, Xiang
%A Chang, Cheng
%A Yang, Runzhe
%A Yu, Kai
%Y Palmer, Martha
%Y Hwa, Rebecca
%Y Riedel, Sebastian
%S Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing
%D 2017
%8 September
%I Association for Computational Linguistics
%C Copenhagen, Denmark
%F chen-etal-2017-agent
%X Hand-crafted rules and reinforcement learning (RL) are two popular choices to obtain dialogue policy. The rule-based policy is often reliable within predefined scope but not self-adaptable, whereas RL is evolvable with data but often suffers from a bad initial performance. We employ a companion learning framework to integrate the two approaches for on-line dialogue policy learning, in which a pre-defined rule-based policy acts as a “teacher” and guides a data-driven RL system by giving example actions as well as additional rewards. A novel agent-aware dropout Deep Q-Network (AAD-DQN) is proposed to address the problem of when to consult the teacher and how to learn from the teacher’s experiences. AAD-DQN, as a data-driven student policy, provides (1) two separate experience memories for student and teacher, (2) an uncertainty estimated by dropout to control the timing of consultation and learning. Simulation experiments showed that the proposed approach can significantly improve both safetyand efficiency of on-line policy optimization compared to other companion learning approaches as well as supervised pre-training using static dialogue corpus.
%R 10.18653/v1/D17-1260
%U https://aclanthology.org/D17-1260
%U https://doi.org/10.18653/v1/D17-1260
%P 2454-2464
Markdown (Informal)
[Agent-Aware Dropout DQN for Safe and Efficient On-line Dialogue Policy Learning](https://aclanthology.org/D17-1260) (Chen et al., EMNLP 2017)
ACL