@inproceedings{corbelle-etal-2020-proof,
title = "A proof of concept on triangular test evaluation for Natural Language Generation",
author = "Corbelle, Javier Gonz{\'a}lez and
Moral, Jos{\'e} Mar{\'\i}a Alonso and
Diz, Alberto Bugar{\'\i}n",
editor = "Agarwal, Shubham and
Du{\v{s}}ek, Ond{\v{r}}ej and
Gehrmann, Sebastian and
Gkatzia, Dimitra and
Konstas, Ioannis and
Van Miltenburg, Emiel and
Santhanam, Sashank",
booktitle = "Proceedings of the 1st Workshop on Evaluating NLG Evaluation",
month = dec,
year = "2020",
address = "Online (Dublin, Ireland)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.evalnlgeval-1.1",
pages = "1--9",
abstract = "The evaluation of Natural Language Generation (NLG) systems has recently aroused much interest in the research community, since it should address several challenging aspects, such as readability of the generated texts, adequacy to the user within a particular context and moment and linguistic quality-related issues (e.g., correctness, coherence, understandability), among others. In this paper, we propose a novel technique for evaluating NLG systems that is inspired on the triangular test used in the field of sensory analysis. This technique allows us to compare two texts generated by different subjects and to i) determine whether statistically significant differences are detected between them when evaluated by humans and ii) quantify to what extent the number of evaluators plays an important role in the sensitivity of the results. As a proof of concept, we apply this evaluation technique in a real use case in the field of meteorology, showing the advantages and disadvantages of our proposal.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="corbelle-etal-2020-proof">
<titleInfo>
<title>A proof of concept on triangular test evaluation for Natural Language Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Javier</namePart>
<namePart type="given">González</namePart>
<namePart type="family">Corbelle</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">José</namePart>
<namePart type="given">María</namePart>
<namePart type="given">Alonso</namePart>
<namePart type="family">Moral</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alberto</namePart>
<namePart type="given">Bugarín</namePart>
<namePart type="family">Diz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Evaluating NLG Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shubham</namePart>
<namePart type="family">Agarwal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ondřej</namePart>
<namePart type="family">Dušek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Gehrmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dimitra</namePart>
<namePart type="family">Gkatzia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ioannis</namePart>
<namePart type="family">Konstas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emiel</namePart>
<namePart type="family">Van Miltenburg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sashank</namePart>
<namePart type="family">Santhanam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online (Dublin, Ireland)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The evaluation of Natural Language Generation (NLG) systems has recently aroused much interest in the research community, since it should address several challenging aspects, such as readability of the generated texts, adequacy to the user within a particular context and moment and linguistic quality-related issues (e.g., correctness, coherence, understandability), among others. In this paper, we propose a novel technique for evaluating NLG systems that is inspired on the triangular test used in the field of sensory analysis. This technique allows us to compare two texts generated by different subjects and to i) determine whether statistically significant differences are detected between them when evaluated by humans and ii) quantify to what extent the number of evaluators plays an important role in the sensitivity of the results. As a proof of concept, we apply this evaluation technique in a real use case in the field of meteorology, showing the advantages and disadvantages of our proposal.</abstract>
<identifier type="citekey">corbelle-etal-2020-proof</identifier>
<location>
<url>https://aclanthology.org/2020.evalnlgeval-1.1</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>1</start>
<end>9</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A proof of concept on triangular test evaluation for Natural Language Generation
%A Corbelle, Javier González
%A Moral, José María Alonso
%A Diz, Alberto Bugarín
%Y Agarwal, Shubham
%Y Dušek, Ondřej
%Y Gehrmann, Sebastian
%Y Gkatzia, Dimitra
%Y Konstas, Ioannis
%Y Van Miltenburg, Emiel
%Y Santhanam, Sashank
%S Proceedings of the 1st Workshop on Evaluating NLG Evaluation
%D 2020
%8 December
%I Association for Computational Linguistics
%C Online (Dublin, Ireland)
%F corbelle-etal-2020-proof
%X The evaluation of Natural Language Generation (NLG) systems has recently aroused much interest in the research community, since it should address several challenging aspects, such as readability of the generated texts, adequacy to the user within a particular context and moment and linguistic quality-related issues (e.g., correctness, coherence, understandability), among others. In this paper, we propose a novel technique for evaluating NLG systems that is inspired on the triangular test used in the field of sensory analysis. This technique allows us to compare two texts generated by different subjects and to i) determine whether statistically significant differences are detected between them when evaluated by humans and ii) quantify to what extent the number of evaluators plays an important role in the sensitivity of the results. As a proof of concept, we apply this evaluation technique in a real use case in the field of meteorology, showing the advantages and disadvantages of our proposal.
%U https://aclanthology.org/2020.evalnlgeval-1.1
%P 1-9
Markdown (Informal)
[A proof of concept on triangular test evaluation for Natural Language Generation](https://aclanthology.org/2020.evalnlgeval-1.1) (Corbelle et al., EvalNLGEval 2020)
ACL