@inproceedings{cheng-etal-2022-multiwoz,
title = "Is {M}ulti{WOZ} a Solved Task? An Interactive {TOD} Evaluation Framework with User Simulator",
author = "Cheng, Qinyuan and
Li, Linyang and
Quan, Guofeng and
Gao, Feng and
Mou, Xiaofeng and
Qiu, Xipeng",
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2022",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.findings-emnlp.90",
doi = "10.18653/v1/2022.findings-emnlp.90",
pages = "1248--1259",
abstract = "Task-Oriented Dialogue (TOD) systems are drawing more and more attention in recent studies.Current methods focus on constructing pre-trained models or fine-tuning strategies while the evaluation of TOD is limited by a policy mismatch problem.That is, during evaluation, the user utterances are from the annotated dataset while these utterances should interact with previous responses which can have many alternatives besides annotated texts.Therefore, in this work, we propose an interactive evaluation framework for TOD. We first build a goal-oriented user simulator based on pre-trained models and then use the user simulator to interact with the dialogue system to generate dialogues.Besides, we introduce a sentence-level and a session-level score to measure the sentence fluency and session coherence in the interactive evaluation. Experimental results show that RL-based TOD systems trained by our proposed user simulator can achieve nearly 98{\%} inform and success rates in the interactive evaluation of MultiWOZ dataset and the proposed scores measure the response quality besides the inform and success rates.We are hoping that our work will encourage simulator-based interactive evaluations in the TOD task.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cheng-etal-2022-multiwoz">
<titleInfo>
<title>Is MultiWOZ a Solved Task? An Interactive TOD Evaluation Framework with User Simulator</title>
</titleInfo>
<name type="personal">
<namePart type="given">Qinyuan</namePart>
<namePart type="family">Cheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Linyang</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guofeng</namePart>
<namePart type="family">Quan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Feng</namePart>
<namePart type="family">Gao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaofeng</namePart>
<namePart type="family">Mou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xipeng</namePart>
<namePart type="family">Qiu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Goldberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Task-Oriented Dialogue (TOD) systems are drawing more and more attention in recent studies.Current methods focus on constructing pre-trained models or fine-tuning strategies while the evaluation of TOD is limited by a policy mismatch problem.That is, during evaluation, the user utterances are from the annotated dataset while these utterances should interact with previous responses which can have many alternatives besides annotated texts.Therefore, in this work, we propose an interactive evaluation framework for TOD. We first build a goal-oriented user simulator based on pre-trained models and then use the user simulator to interact with the dialogue system to generate dialogues.Besides, we introduce a sentence-level and a session-level score to measure the sentence fluency and session coherence in the interactive evaluation. Experimental results show that RL-based TOD systems trained by our proposed user simulator can achieve nearly 98% inform and success rates in the interactive evaluation of MultiWOZ dataset and the proposed scores measure the response quality besides the inform and success rates.We are hoping that our work will encourage simulator-based interactive evaluations in the TOD task.</abstract>
<identifier type="citekey">cheng-etal-2022-multiwoz</identifier>
<identifier type="doi">10.18653/v1/2022.findings-emnlp.90</identifier>
<location>
<url>https://aclanthology.org/2022.findings-emnlp.90</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>1248</start>
<end>1259</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Is MultiWOZ a Solved Task? An Interactive TOD Evaluation Framework with User Simulator
%A Cheng, Qinyuan
%A Li, Linyang
%A Quan, Guofeng
%A Gao, Feng
%A Mou, Xiaofeng
%A Qiu, Xipeng
%Y Goldberg, Yoav
%Y Kozareva, Zornitsa
%Y Zhang, Yue
%S Findings of the Association for Computational Linguistics: EMNLP 2022
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates
%F cheng-etal-2022-multiwoz
%X Task-Oriented Dialogue (TOD) systems are drawing more and more attention in recent studies.Current methods focus on constructing pre-trained models or fine-tuning strategies while the evaluation of TOD is limited by a policy mismatch problem.That is, during evaluation, the user utterances are from the annotated dataset while these utterances should interact with previous responses which can have many alternatives besides annotated texts.Therefore, in this work, we propose an interactive evaluation framework for TOD. We first build a goal-oriented user simulator based on pre-trained models and then use the user simulator to interact with the dialogue system to generate dialogues.Besides, we introduce a sentence-level and a session-level score to measure the sentence fluency and session coherence in the interactive evaluation. Experimental results show that RL-based TOD systems trained by our proposed user simulator can achieve nearly 98% inform and success rates in the interactive evaluation of MultiWOZ dataset and the proposed scores measure the response quality besides the inform and success rates.We are hoping that our work will encourage simulator-based interactive evaluations in the TOD task.
%R 10.18653/v1/2022.findings-emnlp.90
%U https://aclanthology.org/2022.findings-emnlp.90
%U https://doi.org/10.18653/v1/2022.findings-emnlp.90
%P 1248-1259
Markdown (Informal)
[Is MultiWOZ a Solved Task? An Interactive TOD Evaluation Framework with User Simulator](https://aclanthology.org/2022.findings-emnlp.90) (Cheng et al., Findings 2022)
ACL