@inproceedings{oshima-etal-2024-gap,
title = "The Gap in the Strategy of Recovering Task Failure between {GPT}-4{V} and Humans in a Visual Dialogue",
author = "Oshima, Ryosuke and
Shinagawa, Seitaro and
Morishima, Shigeo",
editor = "Kawahara, Tatsuya and
Demberg, Vera and
Ultes, Stefan and
Inoue, Koji and
Mehri, Shikib and
Howcroft, David and
Komatani, Kazunori",
booktitle = "Proceedings of the 25th Annual Meeting of the Special Interest Group on Discourse and Dialogue",
month = sep,
year = "2024",
address = "Kyoto, Japan",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.sigdial-1.62",
pages = "728--745",
abstract = "Goal-oriented dialogue systems interact with humans to accomplish specific tasks. However, sometimes these systems fail to establish a common ground with users, leading to task failures. In such cases, it is crucial not to just end with failure but to correct and recover the dialogue to turn it into a success for building a robust goal-oriented dialogue system. Effective recovery from task failures in a goal-oriented dialogue involves not only successful recovery but also accurately understanding the situation of the failed task to minimize unnecessary interactions and avoid frustrating the user. In this study, we analyze the capabilities of GPT-4V in recovering failure tasks by comparing its performance with that of humans using Guess What?! Game. The results show that GPT-4V employs less efficient recovery strategies, such as asking additional unnecessary questions, than humans. We also found that while humans can occasionally ask questions that doubt the accuracy of the interlocutor{'}s answer during task recovery, GPT-4V lacks this capability.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="oshima-etal-2024-gap">
<titleInfo>
<title>The Gap in the Strategy of Recovering Task Failure between GPT-4V and Humans in a Visual Dialogue</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ryosuke</namePart>
<namePart type="family">Oshima</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seitaro</namePart>
<namePart type="family">Shinagawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shigeo</namePart>
<namePart type="family">Morishima</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 25th Annual Meeting of the Special Interest Group on Discourse and Dialogue</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tatsuya</namePart>
<namePart type="family">Kawahara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefan</namePart>
<namePart type="family">Ultes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Koji</namePart>
<namePart type="family">Inoue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shikib</namePart>
<namePart type="family">Mehri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Howcroft</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kazunori</namePart>
<namePart type="family">Komatani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Kyoto, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Goal-oriented dialogue systems interact with humans to accomplish specific tasks. However, sometimes these systems fail to establish a common ground with users, leading to task failures. In such cases, it is crucial not to just end with failure but to correct and recover the dialogue to turn it into a success for building a robust goal-oriented dialogue system. Effective recovery from task failures in a goal-oriented dialogue involves not only successful recovery but also accurately understanding the situation of the failed task to minimize unnecessary interactions and avoid frustrating the user. In this study, we analyze the capabilities of GPT-4V in recovering failure tasks by comparing its performance with that of humans using Guess What?! Game. The results show that GPT-4V employs less efficient recovery strategies, such as asking additional unnecessary questions, than humans. We also found that while humans can occasionally ask questions that doubt the accuracy of the interlocutor’s answer during task recovery, GPT-4V lacks this capability.</abstract>
<identifier type="citekey">oshima-etal-2024-gap</identifier>
<location>
<url>https://aclanthology.org/2024.sigdial-1.62</url>
</location>
<part>
<date>2024-09</date>
<extent unit="page">
<start>728</start>
<end>745</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Gap in the Strategy of Recovering Task Failure between GPT-4V and Humans in a Visual Dialogue
%A Oshima, Ryosuke
%A Shinagawa, Seitaro
%A Morishima, Shigeo
%Y Kawahara, Tatsuya
%Y Demberg, Vera
%Y Ultes, Stefan
%Y Inoue, Koji
%Y Mehri, Shikib
%Y Howcroft, David
%Y Komatani, Kazunori
%S Proceedings of the 25th Annual Meeting of the Special Interest Group on Discourse and Dialogue
%D 2024
%8 September
%I Association for Computational Linguistics
%C Kyoto, Japan
%F oshima-etal-2024-gap
%X Goal-oriented dialogue systems interact with humans to accomplish specific tasks. However, sometimes these systems fail to establish a common ground with users, leading to task failures. In such cases, it is crucial not to just end with failure but to correct and recover the dialogue to turn it into a success for building a robust goal-oriented dialogue system. Effective recovery from task failures in a goal-oriented dialogue involves not only successful recovery but also accurately understanding the situation of the failed task to minimize unnecessary interactions and avoid frustrating the user. In this study, we analyze the capabilities of GPT-4V in recovering failure tasks by comparing its performance with that of humans using Guess What?! Game. The results show that GPT-4V employs less efficient recovery strategies, such as asking additional unnecessary questions, than humans. We also found that while humans can occasionally ask questions that doubt the accuracy of the interlocutor’s answer during task recovery, GPT-4V lacks this capability.
%U https://aclanthology.org/2024.sigdial-1.62
%P 728-745
Markdown (Informal)
[The Gap in the Strategy of Recovering Task Failure between GPT-4V and Humans in a Visual Dialogue](https://aclanthology.org/2024.sigdial-1.62) (Oshima et al., SIGDIAL 2024)
ACL