@inproceedings{ghazarian-etal-2022-wrong,
title = "What is wrong with you?: Leveraging User Sentiment for Automatic Dialog Evaluation",
author = "Ghazarian, Sarik and
Hedayatnia, Behnam and
Papangelis, Alexandros and
Liu, Yang and
Hakkani-Tur, Dilek",
editor = "Muresan, Smaranda and
Nakov, Preslav and
Villavicencio, Aline",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2022",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.findings-acl.331",
doi = "10.18653/v1/2022.findings-acl.331",
pages = "4194--4204",
abstract = "Accurate automatic evaluation metrics for open-domain dialogs are in high demand. Existing model-based metrics for system response evaluation are trained on human annotated data, which is cumbersome to collect. In this work, we propose to use information that can be automatically extracted from the next user utterance, such as its sentiment or whether the user explicitly ends the conversation, as a proxy to measure the quality of the previous system response. This allows us to train on a massive set of dialogs with weak supervision, without requiring manual system turn quality annotations. Experiments show that our model is comparable to models trained on human annotated data. Furthermore, our model generalizes across both spoken and written open-domain dialog corpora collected from real and paid users.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ghazarian-etal-2022-wrong">
<titleInfo>
<title>What is wrong with you?: Leveraging User Sentiment for Automatic Dialog Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sarik</namePart>
<namePart type="family">Ghazarian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Behnam</namePart>
<namePart type="family">Hedayatnia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexandros</namePart>
<namePart type="family">Papangelis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dilek</namePart>
<namePart type="family">Hakkani-Tur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Smaranda</namePart>
<namePart type="family">Muresan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aline</namePart>
<namePart type="family">Villavicencio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Accurate automatic evaluation metrics for open-domain dialogs are in high demand. Existing model-based metrics for system response evaluation are trained on human annotated data, which is cumbersome to collect. In this work, we propose to use information that can be automatically extracted from the next user utterance, such as its sentiment or whether the user explicitly ends the conversation, as a proxy to measure the quality of the previous system response. This allows us to train on a massive set of dialogs with weak supervision, without requiring manual system turn quality annotations. Experiments show that our model is comparable to models trained on human annotated data. Furthermore, our model generalizes across both spoken and written open-domain dialog corpora collected from real and paid users.</abstract>
<identifier type="citekey">ghazarian-etal-2022-wrong</identifier>
<identifier type="doi">10.18653/v1/2022.findings-acl.331</identifier>
<location>
<url>https://aclanthology.org/2022.findings-acl.331</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>4194</start>
<end>4204</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T What is wrong with you?: Leveraging User Sentiment for Automatic Dialog Evaluation
%A Ghazarian, Sarik
%A Hedayatnia, Behnam
%A Papangelis, Alexandros
%A Liu, Yang
%A Hakkani-Tur, Dilek
%Y Muresan, Smaranda
%Y Nakov, Preslav
%Y Villavicencio, Aline
%S Findings of the Association for Computational Linguistics: ACL 2022
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F ghazarian-etal-2022-wrong
%X Accurate automatic evaluation metrics for open-domain dialogs are in high demand. Existing model-based metrics for system response evaluation are trained on human annotated data, which is cumbersome to collect. In this work, we propose to use information that can be automatically extracted from the next user utterance, such as its sentiment or whether the user explicitly ends the conversation, as a proxy to measure the quality of the previous system response. This allows us to train on a massive set of dialogs with weak supervision, without requiring manual system turn quality annotations. Experiments show that our model is comparable to models trained on human annotated data. Furthermore, our model generalizes across both spoken and written open-domain dialog corpora collected from real and paid users.
%R 10.18653/v1/2022.findings-acl.331
%U https://aclanthology.org/2022.findings-acl.331
%U https://doi.org/10.18653/v1/2022.findings-acl.331
%P 4194-4204
Markdown (Informal)
[What is wrong with you?: Leveraging User Sentiment for Automatic Dialog Evaluation](https://aclanthology.org/2022.findings-acl.331) (Ghazarian et al., Findings 2022)
ACL