@inproceedings{dogruoz-skantze-2021-open,
title = "How {``}open{''} are the conversations with open-domain chatbots? A proposal for Speech Event based evaluation",
author = {Do{\u{g}}ru{\"o}z, A. Seza and
Skantze, Gabriel},
editor = "Li, Haizhou and
Levow, Gina-Anne and
Yu, Zhou and
Gupta, Chitralekha and
Sisman, Berrak and
Cai, Siqi and
Vandyke, David and
Dethlefs, Nina and
Wu, Yan and
Li, Junyi Jessy",
booktitle = "Proceedings of the 22nd Annual Meeting of the Special Interest Group on Discourse and Dialogue",
month = jul,
year = "2021",
address = "Singapore and Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.sigdial-1.41",
doi = "10.18653/v1/2021.sigdial-1.41",
pages = "392--402",
abstract = "Open-domain chatbots are supposed to converse freely with humans without being restricted to a topic, task or domain. However, the boundaries and/or contents of open-domain conversations are not clear. To clarify the boundaries of {``}openness{''}, we conduct two studies: First, we classify the types of {``}speech events{''} encountered in a chatbot evaluation data set (i.e., Meena by Google) and find that these conversations mainly cover the {``}small talk{''} category and exclude the other speech event categories encountered in real life human-human communication. Second, we conduct a small-scale pilot study to generate online conversations covering a wider range of speech event categories between two humans vs. a human and a state-of-the-art chatbot (i.e., Blender by Facebook). A human evaluation of these generated conversations indicates a preference for human-human conversations, since the human-chatbot conversations lack coherence in most speech event categories. Based on these results, we suggest (a) using the term {``}small talk{''} instead of {``}open-domain{''} for the current chatbots which are not that {``}open{''} in terms of conversational abilities yet, and (b) revising the evaluation methods to test the chatbot conversations against other speech events.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dogruoz-skantze-2021-open">
<titleInfo>
<title>How “open” are the conversations with open-domain chatbots? A proposal for Speech Event based evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gabriel</namePart>
<namePart type="family">Skantze</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 22nd Annual Meeting of the Special Interest Group on Discourse and Dialogue</title>
</titleInfo>
<name type="personal">
<namePart type="given">Haizhou</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gina-Anne</namePart>
<namePart type="family">Levow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhou</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chitralekha</namePart>
<namePart type="family">Gupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Berrak</namePart>
<namePart type="family">Sisman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Siqi</namePart>
<namePart type="family">Cai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Vandyke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nina</namePart>
<namePart type="family">Dethlefs</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yan</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junyi</namePart>
<namePart type="given">Jessy</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore and Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Open-domain chatbots are supposed to converse freely with humans without being restricted to a topic, task or domain. However, the boundaries and/or contents of open-domain conversations are not clear. To clarify the boundaries of “openness”, we conduct two studies: First, we classify the types of “speech events” encountered in a chatbot evaluation data set (i.e., Meena by Google) and find that these conversations mainly cover the “small talk” category and exclude the other speech event categories encountered in real life human-human communication. Second, we conduct a small-scale pilot study to generate online conversations covering a wider range of speech event categories between two humans vs. a human and a state-of-the-art chatbot (i.e., Blender by Facebook). A human evaluation of these generated conversations indicates a preference for human-human conversations, since the human-chatbot conversations lack coherence in most speech event categories. Based on these results, we suggest (a) using the term “small talk” instead of “open-domain” for the current chatbots which are not that “open” in terms of conversational abilities yet, and (b) revising the evaluation methods to test the chatbot conversations against other speech events.</abstract>
<identifier type="citekey">dogruoz-skantze-2021-open</identifier>
<identifier type="doi">10.18653/v1/2021.sigdial-1.41</identifier>
<location>
<url>https://aclanthology.org/2021.sigdial-1.41</url>
</location>
<part>
<date>2021-07</date>
<extent unit="page">
<start>392</start>
<end>402</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T How “open” are the conversations with open-domain chatbots? A proposal for Speech Event based evaluation
%A Doğruöz, A. Seza
%A Skantze, Gabriel
%Y Li, Haizhou
%Y Levow, Gina-Anne
%Y Yu, Zhou
%Y Gupta, Chitralekha
%Y Sisman, Berrak
%Y Cai, Siqi
%Y Vandyke, David
%Y Dethlefs, Nina
%Y Wu, Yan
%Y Li, Junyi Jessy
%S Proceedings of the 22nd Annual Meeting of the Special Interest Group on Discourse and Dialogue
%D 2021
%8 July
%I Association for Computational Linguistics
%C Singapore and Online
%F dogruoz-skantze-2021-open
%X Open-domain chatbots are supposed to converse freely with humans without being restricted to a topic, task or domain. However, the boundaries and/or contents of open-domain conversations are not clear. To clarify the boundaries of “openness”, we conduct two studies: First, we classify the types of “speech events” encountered in a chatbot evaluation data set (i.e., Meena by Google) and find that these conversations mainly cover the “small talk” category and exclude the other speech event categories encountered in real life human-human communication. Second, we conduct a small-scale pilot study to generate online conversations covering a wider range of speech event categories between two humans vs. a human and a state-of-the-art chatbot (i.e., Blender by Facebook). A human evaluation of these generated conversations indicates a preference for human-human conversations, since the human-chatbot conversations lack coherence in most speech event categories. Based on these results, we suggest (a) using the term “small talk” instead of “open-domain” for the current chatbots which are not that “open” in terms of conversational abilities yet, and (b) revising the evaluation methods to test the chatbot conversations against other speech events.
%R 10.18653/v1/2021.sigdial-1.41
%U https://aclanthology.org/2021.sigdial-1.41
%U https://doi.org/10.18653/v1/2021.sigdial-1.41
%P 392-402
Markdown (Informal)
[How “open” are the conversations with open-domain chatbots? A proposal for Speech Event based evaluation](https://aclanthology.org/2021.sigdial-1.41) (Doğruöz & Skantze, SIGDIAL 2021)
ACL