@inproceedings{lux-etal-2020-truth,
title = "Truth or Error? Towards systematic analysis of factual errors in abstractive summaries",
author = "Lux, Klaus-Michael and
Sappelli, Maya and
Larson, Martha",
booktitle = "Proceedings of the First Workshop on Evaluation and Comparison of NLP Systems",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.eval4nlp-1.1",
doi = "10.18653/v1/2020.eval4nlp-1.1",
pages = "1--10",
abstract = "This paper presents a typology of errors produced by automatic summarization systems. The typology was created by manually analyzing the output of four recent neural summarization systems. Our work is motivated by the growing awareness of the need for better summary evaluation methods that go beyond conventional overlap-based metrics. Our typology is structured into two dimensions. First, the Mapping Dimension describes surface-level errors and provides insight into word-sequence transformation issues. Second, the Meaning Dimension describes issues related to interpretation and provides insight into breakdowns in truth, i.e., factual faithfulness to the original text. Comparative analysis revealed that two neural summarization systems leveraging pre-trained models have an advantage in decreasing grammaticality errors, but not necessarily factual errors. We also discuss the importance of ensuring that summary length and abstractiveness do not interfere with evaluating summary quality.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lux-etal-2020-truth">
<titleInfo>
<title>Truth or Error? Towards systematic analysis of factual errors in abstractive summaries</title>
</titleInfo>
<name type="personal">
<namePart type="given">Klaus-Michael</namePart>
<namePart type="family">Lux</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maya</namePart>
<namePart type="family">Sappelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Martha</namePart>
<namePart type="family">Larson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Evaluation and Comparison of NLP Systems</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents a typology of errors produced by automatic summarization systems. The typology was created by manually analyzing the output of four recent neural summarization systems. Our work is motivated by the growing awareness of the need for better summary evaluation methods that go beyond conventional overlap-based metrics. Our typology is structured into two dimensions. First, the Mapping Dimension describes surface-level errors and provides insight into word-sequence transformation issues. Second, the Meaning Dimension describes issues related to interpretation and provides insight into breakdowns in truth, i.e., factual faithfulness to the original text. Comparative analysis revealed that two neural summarization systems leveraging pre-trained models have an advantage in decreasing grammaticality errors, but not necessarily factual errors. We also discuss the importance of ensuring that summary length and abstractiveness do not interfere with evaluating summary quality.</abstract>
<identifier type="citekey">lux-etal-2020-truth</identifier>
<identifier type="doi">10.18653/v1/2020.eval4nlp-1.1</identifier>
<location>
<url>https://aclanthology.org/2020.eval4nlp-1.1</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>1</start>
<end>10</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Truth or Error? Towards systematic analysis of factual errors in abstractive summaries
%A Lux, Klaus-Michael
%A Sappelli, Maya
%A Larson, Martha
%S Proceedings of the First Workshop on Evaluation and Comparison of NLP Systems
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F lux-etal-2020-truth
%X This paper presents a typology of errors produced by automatic summarization systems. The typology was created by manually analyzing the output of four recent neural summarization systems. Our work is motivated by the growing awareness of the need for better summary evaluation methods that go beyond conventional overlap-based metrics. Our typology is structured into two dimensions. First, the Mapping Dimension describes surface-level errors and provides insight into word-sequence transformation issues. Second, the Meaning Dimension describes issues related to interpretation and provides insight into breakdowns in truth, i.e., factual faithfulness to the original text. Comparative analysis revealed that two neural summarization systems leveraging pre-trained models have an advantage in decreasing grammaticality errors, but not necessarily factual errors. We also discuss the importance of ensuring that summary length and abstractiveness do not interfere with evaluating summary quality.
%R 10.18653/v1/2020.eval4nlp-1.1
%U https://aclanthology.org/2020.eval4nlp-1.1
%U https://doi.org/10.18653/v1/2020.eval4nlp-1.1
%P 1-10
Markdown (Informal)
[Truth or Error? Towards systematic analysis of factual errors in abstractive summaries](https://aclanthology.org/2020.eval4nlp-1.1) (Lux et al., Eval4NLP 2020)
ACL