@inproceedings{hudecek-dusek-2023-large,
title = "Are Large Language Models All You Need for Task-Oriented Dialogue?",
author = "Hude{\v{c}}ek, Vojt{\v{e}}ch and
Dusek, Ondrej",
editor = "Stoyanchev, Svetlana and
Joty, Shafiq and
Schlangen, David and
Dusek, Ondrej and
Kennington, Casey and
Alikhani, Malihe",
booktitle = "Proceedings of the 24th Annual Meeting of the Special Interest Group on Discourse and Dialogue",
month = sep,
year = "2023",
address = "Prague, Czechia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.sigdial-1.21",
doi = "10.18653/v1/2023.sigdial-1.21",
pages = "216--228",
abstract = "Instruction-finetuned large language models (LLMs) gained a huge popularity recently, thanks to their ability to interact with users through conversation. In this work, we aim to evaluate their ability to complete multi-turn tasks and interact with external databases in the context of established task-oriented dialogue benchmarks. We show that in explicit belief state tracking, LLMs underperform compared to specialized task-specific models. Nevertheless, they show some ability to guide the dialogue to a successful ending through their generated responses if they are provided with correct slot values. Furthermore, this ability improves with few-shot in-domain examples.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hudecek-dusek-2023-large">
<titleInfo>
<title>Are Large Language Models All You Need for Task-Oriented Dialogue?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vojtěch</namePart>
<namePart type="family">Hudeček</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ondrej</namePart>
<namePart type="family">Dusek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 24th Annual Meeting of the Special Interest Group on Discourse and Dialogue</title>
</titleInfo>
<name type="personal">
<namePart type="given">Svetlana</namePart>
<namePart type="family">Stoyanchev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shafiq</namePart>
<namePart type="family">Joty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Schlangen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ondrej</namePart>
<namePart type="family">Dusek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Casey</namePart>
<namePart type="family">Kennington</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Malihe</namePart>
<namePart type="family">Alikhani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Prague, Czechia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Instruction-finetuned large language models (LLMs) gained a huge popularity recently, thanks to their ability to interact with users through conversation. In this work, we aim to evaluate their ability to complete multi-turn tasks and interact with external databases in the context of established task-oriented dialogue benchmarks. We show that in explicit belief state tracking, LLMs underperform compared to specialized task-specific models. Nevertheless, they show some ability to guide the dialogue to a successful ending through their generated responses if they are provided with correct slot values. Furthermore, this ability improves with few-shot in-domain examples.</abstract>
<identifier type="citekey">hudecek-dusek-2023-large</identifier>
<identifier type="doi">10.18653/v1/2023.sigdial-1.21</identifier>
<location>
<url>https://aclanthology.org/2023.sigdial-1.21</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>216</start>
<end>228</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Are Large Language Models All You Need for Task-Oriented Dialogue?
%A Hudeček, Vojtěch
%A Dusek, Ondrej
%Y Stoyanchev, Svetlana
%Y Joty, Shafiq
%Y Schlangen, David
%Y Dusek, Ondrej
%Y Kennington, Casey
%Y Alikhani, Malihe
%S Proceedings of the 24th Annual Meeting of the Special Interest Group on Discourse and Dialogue
%D 2023
%8 September
%I Association for Computational Linguistics
%C Prague, Czechia
%F hudecek-dusek-2023-large
%X Instruction-finetuned large language models (LLMs) gained a huge popularity recently, thanks to their ability to interact with users through conversation. In this work, we aim to evaluate their ability to complete multi-turn tasks and interact with external databases in the context of established task-oriented dialogue benchmarks. We show that in explicit belief state tracking, LLMs underperform compared to specialized task-specific models. Nevertheless, they show some ability to guide the dialogue to a successful ending through their generated responses if they are provided with correct slot values. Furthermore, this ability improves with few-shot in-domain examples.
%R 10.18653/v1/2023.sigdial-1.21
%U https://aclanthology.org/2023.sigdial-1.21
%U https://doi.org/10.18653/v1/2023.sigdial-1.21
%P 216-228
Markdown (Informal)
[Are Large Language Models All You Need for Task-Oriented Dialogue?](https://aclanthology.org/2023.sigdial-1.21) (Hudeček & Dusek, SIGDIAL 2023)
ACL