@inproceedings{dusek-kasner-2020-evaluating,
title = "Evaluating Semantic Accuracy of Data-to-Text Generation with Natural Language Inference",
author = "Du{\v{s}}ek, Ond{\v{r}}ej and
Kasner, Zden{\v{e}}k",
booktitle = "Proceedings of the 13th International Conference on Natural Language Generation",
month = dec,
year = "2020",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.inlg-1.19",
pages = "131--137",
abstract = "A major challenge in evaluating data-to-text (D2T) generation is measuring the semantic accuracy of the generated text, i.e. checking if the output text contains all and only facts supported by the input data. We propose a new metric for evaluating the semantic accuracy of D2T generation based on a neural model pretrained for natural language inference (NLI). We use the NLI model to check textual entailment between the input data and the output text in both directions, allowing us to reveal omissions or hallucinations. Input data are converted to text for NLI using trivial templates. Our experiments on two recent D2T datasets show that our metric can achieve high accuracy in identifying erroneous system outputs.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dusek-kasner-2020-evaluating">
<titleInfo>
<title>Evaluating Semantic Accuracy of Data-to-Text Generation with Natural Language Inference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ondřej</namePart>
<namePart type="family">Dušek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zdeněk</namePart>
<namePart type="family">Kasner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th International Conference on Natural Language Generation</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>A major challenge in evaluating data-to-text (D2T) generation is measuring the semantic accuracy of the generated text, i.e. checking if the output text contains all and only facts supported by the input data. We propose a new metric for evaluating the semantic accuracy of D2T generation based on a neural model pretrained for natural language inference (NLI). We use the NLI model to check textual entailment between the input data and the output text in both directions, allowing us to reveal omissions or hallucinations. Input data are converted to text for NLI using trivial templates. Our experiments on two recent D2T datasets show that our metric can achieve high accuracy in identifying erroneous system outputs.</abstract>
<identifier type="citekey">dusek-kasner-2020-evaluating</identifier>
<location>
<url>https://aclanthology.org/2020.inlg-1.19</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>131</start>
<end>137</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluating Semantic Accuracy of Data-to-Text Generation with Natural Language Inference
%A Dušek, Ondřej
%A Kasner, Zdeněk
%S Proceedings of the 13th International Conference on Natural Language Generation
%D 2020
%8 December
%I Association for Computational Linguistics
%C Dublin, Ireland
%F dusek-kasner-2020-evaluating
%X A major challenge in evaluating data-to-text (D2T) generation is measuring the semantic accuracy of the generated text, i.e. checking if the output text contains all and only facts supported by the input data. We propose a new metric for evaluating the semantic accuracy of D2T generation based on a neural model pretrained for natural language inference (NLI). We use the NLI model to check textual entailment between the input data and the output text in both directions, allowing us to reveal omissions or hallucinations. Input data are converted to text for NLI using trivial templates. Our experiments on two recent D2T datasets show that our metric can achieve high accuracy in identifying erroneous system outputs.
%U https://aclanthology.org/2020.inlg-1.19
%P 131-137
Markdown (Informal)
[Evaluating Semantic Accuracy of Data-to-Text Generation with Natural Language Inference](https://aclanthology.org/2020.inlg-1.19) (Dušek & Kasner, INLG 2020)
ACL