@article{ultes-maier-2021-user,
title = "User Satisfaction Reward Estimation Across Domains: Domain-independent Dialogue Policy Learning",
author = "Ultes, Stefan and
Maier, Wolfgang",
editor = "Healey, Patrick and
Eugenio, Barbara Di and
Demberg, Vera and
Ginzburg, Jonathan and
Georgila, Kallirroi and
Zeldes, Amir and
Poesio, Massimo",
journal = "Dialogue {\&} Discourse",
volume = "12",
month = sep,
year = "2021",
address = "Chicago, Illinois, USA",
publisher = "University of Illinois Chicago",
url = "https://aclanthology.org/2021.dnd-12.5/",
doi = "10.5210/dad.2021.203",
pages = "81--114",
abstract = "Learning suitable and well-performing dialogue behaviour in statistical spoken dialogue systems has been in the focus of research for many years. While most work that is based on reinforcement learning employs an objective measure like task success for modelling the reward signal, we propose to use a reward signal based on user satisfaction. We propose a novel estimator and show that it outperforms all previous estimators while learning temporal dependencies implicitly. We show in simulated experiments that a live user satisfaction estimation model may be applied resulting in higher estimated satisfaction whilst achieving similar success rates. Moreover, we show that a satisfaction estimation model trained on one domain may be applied in many other domains that cover a similar task. We verify our findings by employing the model to one of the domains for learning a policy from real users and compare its performance to policies using user satisfaction and task success acquired directly from the users as reward."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ultes-maier-2021-user">
<titleInfo>
<title>User Satisfaction Reward Estimation Across Domains: Domain-independent Dialogue Policy Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Stefan</namePart>
<namePart type="family">Ultes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wolfgang</namePart>
<namePart type="family">Maier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Dialogue & Discourse</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>University of Illinois Chicago</publisher>
<place>
<placeTerm type="text">Chicago, Illinois, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Learning suitable and well-performing dialogue behaviour in statistical spoken dialogue systems has been in the focus of research for many years. While most work that is based on reinforcement learning employs an objective measure like task success for modelling the reward signal, we propose to use a reward signal based on user satisfaction. We propose a novel estimator and show that it outperforms all previous estimators while learning temporal dependencies implicitly. We show in simulated experiments that a live user satisfaction estimation model may be applied resulting in higher estimated satisfaction whilst achieving similar success rates. Moreover, we show that a satisfaction estimation model trained on one domain may be applied in many other domains that cover a similar task. We verify our findings by employing the model to one of the domains for learning a policy from real users and compare its performance to policies using user satisfaction and task success acquired directly from the users as reward.</abstract>
<identifier type="citekey">ultes-maier-2021-user</identifier>
<identifier type="doi">10.5210/dad.2021.203</identifier>
<location>
<url>https://aclanthology.org/2021.dnd-12.5/</url>
</location>
<part>
<date>2021-09</date>
<detail type="volume"><number>12</number></detail>
<extent unit="page">
<start>81</start>
<end>114</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T User Satisfaction Reward Estimation Across Domains: Domain-independent Dialogue Policy Learning
%A Ultes, Stefan
%A Maier, Wolfgang
%J Dialogue & Discourse
%D 2021
%8 September
%V 12
%I University of Illinois Chicago
%C Chicago, Illinois, USA
%F ultes-maier-2021-user
%X Learning suitable and well-performing dialogue behaviour in statistical spoken dialogue systems has been in the focus of research for many years. While most work that is based on reinforcement learning employs an objective measure like task success for modelling the reward signal, we propose to use a reward signal based on user satisfaction. We propose a novel estimator and show that it outperforms all previous estimators while learning temporal dependencies implicitly. We show in simulated experiments that a live user satisfaction estimation model may be applied resulting in higher estimated satisfaction whilst achieving similar success rates. Moreover, we show that a satisfaction estimation model trained on one domain may be applied in many other domains that cover a similar task. We verify our findings by employing the model to one of the domains for learning a policy from real users and compare its performance to policies using user satisfaction and task success acquired directly from the users as reward.
%R 10.5210/dad.2021.203
%U https://aclanthology.org/2021.dnd-12.5/
%U https://doi.org/10.5210/dad.2021.203
%P 81-114
Markdown (Informal)
[User Satisfaction Reward Estimation Across Domains: Domain-independent Dialogue Policy Learning](https://aclanthology.org/2021.dnd-12.5/) (Ultes & Maier, DND 2021)
ACL