@inproceedings{sedoc-etal-2019-chateval,
title = "{C}hat{E}val: A Tool for Chatbot Evaluation",
author = "Sedoc, Jo{\~a}o and
Ippolito, Daphne and
Kirubarajan, Arun and
Thirani, Jai and
Ungar, Lyle and
Callison-Burch, Chris",
editor = "Ammar, Waleed and
Louis, Annie and
Mostafazadeh, Nasrin",
booktitle = "Proceedings of the 2019 Conference of the North {A}merican Chapter of the Association for Computational Linguistics (Demonstrations)",
month = jun,
year = "2019",
address = "Minneapolis, Minnesota",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/N19-4011",
doi = "10.18653/v1/N19-4011",
pages = "60--65",
abstract = "Open-domain dialog systems (i.e. chatbots) are difficult to evaluate. The current best practice for analyzing and comparing these dialog systems is the use of human judgments. However, the lack of standardization in evaluation procedures, and the fact that model parameters and code are rarely published hinder systematic human evaluation experiments. We introduce a unified framework for human evaluation of chatbots that augments existing tools and provides a web-based hub for researchers to share and compare their dialog systems. Researchers can submit their trained models to the ChatEval web interface and obtain comparisons with baselines and prior work. The evaluation code is open-source to ensure standardization and transparency. In addition, we introduce open-source baseline models and evaluation datasets. ChatEval can be found at \url{https://chateval.org}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sedoc-etal-2019-chateval">
<titleInfo>
<title>ChatEval: A Tool for Chatbot Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">João</namePart>
<namePart type="family">Sedoc</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daphne</namePart>
<namePart type="family">Ippolito</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arun</namePart>
<namePart type="family">Kirubarajan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jai</namePart>
<namePart type="family">Thirani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lyle</namePart>
<namePart type="family">Ungar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Callison-Burch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics (Demonstrations)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Waleed</namePart>
<namePart type="family">Ammar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Annie</namePart>
<namePart type="family">Louis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nasrin</namePart>
<namePart type="family">Mostafazadeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Minneapolis, Minnesota</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Open-domain dialog systems (i.e. chatbots) are difficult to evaluate. The current best practice for analyzing and comparing these dialog systems is the use of human judgments. However, the lack of standardization in evaluation procedures, and the fact that model parameters and code are rarely published hinder systematic human evaluation experiments. We introduce a unified framework for human evaluation of chatbots that augments existing tools and provides a web-based hub for researchers to share and compare their dialog systems. Researchers can submit their trained models to the ChatEval web interface and obtain comparisons with baselines and prior work. The evaluation code is open-source to ensure standardization and transparency. In addition, we introduce open-source baseline models and evaluation datasets. ChatEval can be found at https://chateval.org.</abstract>
<identifier type="citekey">sedoc-etal-2019-chateval</identifier>
<identifier type="doi">10.18653/v1/N19-4011</identifier>
<location>
<url>https://aclanthology.org/N19-4011</url>
</location>
<part>
<date>2019-06</date>
<extent unit="page">
<start>60</start>
<end>65</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ChatEval: A Tool for Chatbot Evaluation
%A Sedoc, João
%A Ippolito, Daphne
%A Kirubarajan, Arun
%A Thirani, Jai
%A Ungar, Lyle
%A Callison-Burch, Chris
%Y Ammar, Waleed
%Y Louis, Annie
%Y Mostafazadeh, Nasrin
%S Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics (Demonstrations)
%D 2019
%8 June
%I Association for Computational Linguistics
%C Minneapolis, Minnesota
%F sedoc-etal-2019-chateval
%X Open-domain dialog systems (i.e. chatbots) are difficult to evaluate. The current best practice for analyzing and comparing these dialog systems is the use of human judgments. However, the lack of standardization in evaluation procedures, and the fact that model parameters and code are rarely published hinder systematic human evaluation experiments. We introduce a unified framework for human evaluation of chatbots that augments existing tools and provides a web-based hub for researchers to share and compare their dialog systems. Researchers can submit their trained models to the ChatEval web interface and obtain comparisons with baselines and prior work. The evaluation code is open-source to ensure standardization and transparency. In addition, we introduce open-source baseline models and evaluation datasets. ChatEval can be found at https://chateval.org.
%R 10.18653/v1/N19-4011
%U https://aclanthology.org/N19-4011
%U https://doi.org/10.18653/v1/N19-4011
%P 60-65
Markdown (Informal)
[ChatEval: A Tool for Chatbot Evaluation](https://aclanthology.org/N19-4011) (Sedoc et al., NAACL 2019)
ACL
- João Sedoc, Daphne Ippolito, Arun Kirubarajan, Jai Thirani, Lyle Ungar, and Chris Callison-Burch. 2019. ChatEval: A Tool for Chatbot Evaluation. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics (Demonstrations), pages 60–65, Minneapolis, Minnesota. Association for Computational Linguistics.