@inproceedings{shi-etal-2021-refine-imitate,
title = "Refine and Imitate: Reducing Repetition and Inconsistency in Persuasion Dialogues via Reinforcement Learning and Human Demonstration",
author = "Shi, Weiyan and
Li, Yu and
Sahay, Saurav and
Yu, Zhou",
editor = "Moens, Marie-Francine and
Huang, Xuanjing and
Specia, Lucia and
Yih, Scott Wen-tau",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2021",
month = nov,
year = "2021",
address = "Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.findings-emnlp.295",
doi = "10.18653/v1/2021.findings-emnlp.295",
pages = "3478--3492",
abstract = "Persuasion dialogue system reflects the machine{'}s ability to make strategic moves beyond verbal communication, and therefore differentiates itself from task-oriented or open-domain dialogues and has its own unique values. However, the repetition and inconsistency problems still persist in dialogue response generation and could substantially impact user experience and impede the persuasion outcome. Besides, although reinforcement learning (RL) approaches have achieved big success in strategic tasks such as games, it requires a sophisticated user simulator to provide real-time feedback to the dialogue system, which limits the application of RL on persuasion dialogues. To address these issues towards a better persuasion dialogue system, we apply RL to refine a language model baseline without user simulators, and distill sentence-level information about repetition, inconsistency, and task relevance through rewards. Moreover, to better accomplish the persuasion task, the model learns from human demonstration to imitate human persuasion behavior and selects the most persuasive responses. Experiments show that our model outperforms previous state-of-the-art dialogue models on both automatic metrics and human evaluation results on a donation persuasion task, and generates more diverse, consistent and persuasive conversations according to the user feedback. We will make the code and model publicly available.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="shi-etal-2021-refine-imitate">
<titleInfo>
<title>Refine and Imitate: Reducing Repetition and Inconsistency in Persuasion Dialogues via Reinforcement Learning and Human Demonstration</title>
</titleInfo>
<name type="personal">
<namePart type="given">Weiyan</namePart>
<namePart type="family">Shi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saurav</namePart>
<namePart type="family">Sahay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhou</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2021</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marie-Francine</namePart>
<namePart type="family">Moens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuanjing</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucia</namePart>
<namePart type="family">Specia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Scott</namePart>
<namePart type="given">Wen-tau</namePart>
<namePart type="family">Yih</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Persuasion dialogue system reflects the machine’s ability to make strategic moves beyond verbal communication, and therefore differentiates itself from task-oriented or open-domain dialogues and has its own unique values. However, the repetition and inconsistency problems still persist in dialogue response generation and could substantially impact user experience and impede the persuasion outcome. Besides, although reinforcement learning (RL) approaches have achieved big success in strategic tasks such as games, it requires a sophisticated user simulator to provide real-time feedback to the dialogue system, which limits the application of RL on persuasion dialogues. To address these issues towards a better persuasion dialogue system, we apply RL to refine a language model baseline without user simulators, and distill sentence-level information about repetition, inconsistency, and task relevance through rewards. Moreover, to better accomplish the persuasion task, the model learns from human demonstration to imitate human persuasion behavior and selects the most persuasive responses. Experiments show that our model outperforms previous state-of-the-art dialogue models on both automatic metrics and human evaluation results on a donation persuasion task, and generates more diverse, consistent and persuasive conversations according to the user feedback. We will make the code and model publicly available.</abstract>
<identifier type="citekey">shi-etal-2021-refine-imitate</identifier>
<identifier type="doi">10.18653/v1/2021.findings-emnlp.295</identifier>
<location>
<url>https://aclanthology.org/2021.findings-emnlp.295</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>3478</start>
<end>3492</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Refine and Imitate: Reducing Repetition and Inconsistency in Persuasion Dialogues via Reinforcement Learning and Human Demonstration
%A Shi, Weiyan
%A Li, Yu
%A Sahay, Saurav
%A Yu, Zhou
%Y Moens, Marie-Francine
%Y Huang, Xuanjing
%Y Specia, Lucia
%Y Yih, Scott Wen-tau
%S Findings of the Association for Computational Linguistics: EMNLP 2021
%D 2021
%8 November
%I Association for Computational Linguistics
%C Punta Cana, Dominican Republic
%F shi-etal-2021-refine-imitate
%X Persuasion dialogue system reflects the machine’s ability to make strategic moves beyond verbal communication, and therefore differentiates itself from task-oriented or open-domain dialogues and has its own unique values. However, the repetition and inconsistency problems still persist in dialogue response generation and could substantially impact user experience and impede the persuasion outcome. Besides, although reinforcement learning (RL) approaches have achieved big success in strategic tasks such as games, it requires a sophisticated user simulator to provide real-time feedback to the dialogue system, which limits the application of RL on persuasion dialogues. To address these issues towards a better persuasion dialogue system, we apply RL to refine a language model baseline without user simulators, and distill sentence-level information about repetition, inconsistency, and task relevance through rewards. Moreover, to better accomplish the persuasion task, the model learns from human demonstration to imitate human persuasion behavior and selects the most persuasive responses. Experiments show that our model outperforms previous state-of-the-art dialogue models on both automatic metrics and human evaluation results on a donation persuasion task, and generates more diverse, consistent and persuasive conversations according to the user feedback. We will make the code and model publicly available.
%R 10.18653/v1/2021.findings-emnlp.295
%U https://aclanthology.org/2021.findings-emnlp.295
%U https://doi.org/10.18653/v1/2021.findings-emnlp.295
%P 3478-3492
Markdown (Informal)
[Refine and Imitate: Reducing Repetition and Inconsistency in Persuasion Dialogues via Reinforcement Learning and Human Demonstration](https://aclanthology.org/2021.findings-emnlp.295) (Shi et al., Findings 2021)
ACL