@inproceedings{luu-2022-towards,
title = "Towards Human Evaluation of Mutual Understanding in Human-Computer Spontaneous Conversation: An Empirical Study of Word Sense Disambiguation for Naturalistic Social Dialogs in {A}merican {E}nglish",
author = "Lưu, Alex",
editor = "Belz, Anya and
Popovi{\'c}, Maja and
Reiter, Ehud and
Shimorina, Anastasia",
booktitle = "Proceedings of the 2nd Workshop on Human Evaluation of NLP Systems (HumEval)",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.humeval-1.10",
doi = "10.18653/v1/2022.humeval-1.10",
pages = "116--125",
abstract = "Current evaluation practices for social dialog systems, dedicated to human-computer spontaneous conversation, exclusively focus on the quality of system-generated surface text, but not human-verifiable aspects of mutual understanding between the systems and their interlocutors. This work proposes Word Sense Disambiguation (WSD) as an essential component of a valid and reliable human evaluation framework, whose long-term goal is to radically improve the usability of dialog systems in real-life human-computer collaboration. The practicality of this proposal is proved via experimentally investigating (1) the WordNet 3.0 sense inventory coverage of lexical meanings in spontaneous conversation between humans in American English, assumed as an upper bound of lexical diversity of human-computer communication, and (2) the effectiveness of state-of-the-art WSD models and pretrained transformer-based contextual embeddings on this type of data.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="luu-2022-towards">
<titleInfo>
<title>Towards Human Evaluation of Mutual Understanding in Human-Computer Spontaneous Conversation: An Empirical Study of Word Sense Disambiguation for Naturalistic Social Dialogs in American English</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alex</namePart>
<namePart type="family">Lưu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on Human Evaluation of NLP Systems (HumEval)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anya</namePart>
<namePart type="family">Belz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maja</namePart>
<namePart type="family">Popović</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ehud</namePart>
<namePart type="family">Reiter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anastasia</namePart>
<namePart type="family">Shimorina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Current evaluation practices for social dialog systems, dedicated to human-computer spontaneous conversation, exclusively focus on the quality of system-generated surface text, but not human-verifiable aspects of mutual understanding between the systems and their interlocutors. This work proposes Word Sense Disambiguation (WSD) as an essential component of a valid and reliable human evaluation framework, whose long-term goal is to radically improve the usability of dialog systems in real-life human-computer collaboration. The practicality of this proposal is proved via experimentally investigating (1) the WordNet 3.0 sense inventory coverage of lexical meanings in spontaneous conversation between humans in American English, assumed as an upper bound of lexical diversity of human-computer communication, and (2) the effectiveness of state-of-the-art WSD models and pretrained transformer-based contextual embeddings on this type of data.</abstract>
<identifier type="citekey">luu-2022-towards</identifier>
<identifier type="doi">10.18653/v1/2022.humeval-1.10</identifier>
<location>
<url>https://aclanthology.org/2022.humeval-1.10</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>116</start>
<end>125</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards Human Evaluation of Mutual Understanding in Human-Computer Spontaneous Conversation: An Empirical Study of Word Sense Disambiguation for Naturalistic Social Dialogs in American English
%A Lưu, Alex
%Y Belz, Anya
%Y Popović, Maja
%Y Reiter, Ehud
%Y Shimorina, Anastasia
%S Proceedings of the 2nd Workshop on Human Evaluation of NLP Systems (HumEval)
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F luu-2022-towards
%X Current evaluation practices for social dialog systems, dedicated to human-computer spontaneous conversation, exclusively focus on the quality of system-generated surface text, but not human-verifiable aspects of mutual understanding between the systems and their interlocutors. This work proposes Word Sense Disambiguation (WSD) as an essential component of a valid and reliable human evaluation framework, whose long-term goal is to radically improve the usability of dialog systems in real-life human-computer collaboration. The practicality of this proposal is proved via experimentally investigating (1) the WordNet 3.0 sense inventory coverage of lexical meanings in spontaneous conversation between humans in American English, assumed as an upper bound of lexical diversity of human-computer communication, and (2) the effectiveness of state-of-the-art WSD models and pretrained transformer-based contextual embeddings on this type of data.
%R 10.18653/v1/2022.humeval-1.10
%U https://aclanthology.org/2022.humeval-1.10
%U https://doi.org/10.18653/v1/2022.humeval-1.10
%P 116-125
Markdown (Informal)
[Towards Human Evaluation of Mutual Understanding in Human-Computer Spontaneous Conversation: An Empirical Study of Word Sense Disambiguation for Naturalistic Social Dialogs in American English](https://aclanthology.org/2022.humeval-1.10) (Lưu, HumEval 2022)
ACL