@inproceedings{ultes-2019-improving,
title = "Improving Interaction Quality Estimation with {B}i{LSTM}s and the Impact on Dialogue Policy Learning",
author = "Ultes, Stefan",
editor = "Nakamura, Satoshi and
Gasic, Milica and
Zukerman, Ingrid and
Skantze, Gabriel and
Nakano, Mikio and
Papangelis, Alexandros and
Ultes, Stefan and
Yoshino, Koichiro",
booktitle = "Proceedings of the 20th Annual SIGdial Meeting on Discourse and Dialogue",
month = sep,
year = "2019",
address = "Stockholm, Sweden",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-5902/",
doi = "10.18653/v1/W19-5902",
pages = "11--20",
abstract = "Learning suitable and well-performing dialogue behaviour in statistical spoken dialogue systems has been in the focus of research for many years. While most work which is based on reinforcement learning employs an objective measure like task success for modelling the reward signal, we use a reward based on user satisfaction estimation. We propose a novel estimator and show that it outperforms all previous estimators while learning temporal dependencies implicitly. Furthermore, we apply this novel user satisfaction estimation model live in simulated experiments where the satisfaction estimation model is trained on one domain and applied in many other domains which cover a similar task. We show that applying this model results in higher estimated satisfaction, similar task success rates and a higher robustness to noise."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ultes-2019-improving">
<titleInfo>
<title>Improving Interaction Quality Estimation with BiLSTMs and the Impact on Dialogue Policy Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Stefan</namePart>
<namePart type="family">Ultes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th Annual SIGdial Meeting on Discourse and Dialogue</title>
</titleInfo>
<name type="personal">
<namePart type="given">Satoshi</namePart>
<namePart type="family">Nakamura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Milica</namePart>
<namePart type="family">Gasic</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ingrid</namePart>
<namePart type="family">Zukerman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gabriel</namePart>
<namePart type="family">Skantze</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mikio</namePart>
<namePart type="family">Nakano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexandros</namePart>
<namePart type="family">Papangelis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefan</namePart>
<namePart type="family">Ultes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Koichiro</namePart>
<namePart type="family">Yoshino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Stockholm, Sweden</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Learning suitable and well-performing dialogue behaviour in statistical spoken dialogue systems has been in the focus of research for many years. While most work which is based on reinforcement learning employs an objective measure like task success for modelling the reward signal, we use a reward based on user satisfaction estimation. We propose a novel estimator and show that it outperforms all previous estimators while learning temporal dependencies implicitly. Furthermore, we apply this novel user satisfaction estimation model live in simulated experiments where the satisfaction estimation model is trained on one domain and applied in many other domains which cover a similar task. We show that applying this model results in higher estimated satisfaction, similar task success rates and a higher robustness to noise.</abstract>
<identifier type="citekey">ultes-2019-improving</identifier>
<identifier type="doi">10.18653/v1/W19-5902</identifier>
<location>
<url>https://aclanthology.org/W19-5902/</url>
</location>
<part>
<date>2019-09</date>
<extent unit="page">
<start>11</start>
<end>20</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Improving Interaction Quality Estimation with BiLSTMs and the Impact on Dialogue Policy Learning
%A Ultes, Stefan
%Y Nakamura, Satoshi
%Y Gasic, Milica
%Y Zukerman, Ingrid
%Y Skantze, Gabriel
%Y Nakano, Mikio
%Y Papangelis, Alexandros
%Y Ultes, Stefan
%Y Yoshino, Koichiro
%S Proceedings of the 20th Annual SIGdial Meeting on Discourse and Dialogue
%D 2019
%8 September
%I Association for Computational Linguistics
%C Stockholm, Sweden
%F ultes-2019-improving
%X Learning suitable and well-performing dialogue behaviour in statistical spoken dialogue systems has been in the focus of research for many years. While most work which is based on reinforcement learning employs an objective measure like task success for modelling the reward signal, we use a reward based on user satisfaction estimation. We propose a novel estimator and show that it outperforms all previous estimators while learning temporal dependencies implicitly. Furthermore, we apply this novel user satisfaction estimation model live in simulated experiments where the satisfaction estimation model is trained on one domain and applied in many other domains which cover a similar task. We show that applying this model results in higher estimated satisfaction, similar task success rates and a higher robustness to noise.
%R 10.18653/v1/W19-5902
%U https://aclanthology.org/W19-5902/
%U https://doi.org/10.18653/v1/W19-5902
%P 11-20
Markdown (Informal)
[Improving Interaction Quality Estimation with BiLSTMs and the Impact on Dialogue Policy Learning](https://aclanthology.org/W19-5902/) (Ultes, SIGDIAL 2019)
ACL