@inproceedings{cao-etal-2023-multi,
title = "Multi-View Source Ablation for Faithful Summarization",
author = "Cao, Shuyang and
Ma, Liang and
Lu, Di and
Logan IV, Robert L and
Tetreault, Joel and
Jaimes, Alejandro",
booktitle = "Findings of the Association for Computational Linguistics: EACL 2023",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-eacl.151",
doi = "10.18653/v1/2023.findings-eacl.151",
pages = "2029--2047",
abstract = "In this paper, we present MuFaSSa (Multi-view Faithfulness Scoring via Source Ablation), a metric for evaluating faithfulness of abstractive summaries, and for guiding training of more faithful summarizers. For evaluation, MuFaSSa employs different strategies (e.g., masking entity mentions) to first remove information from the source document to form multiple ablated views. Then, the faithfulness level of each token in a generated summary is measured by the difference between the token generation probabilities when given the original document and the ablated document as inputs to trained summarizers. For training, MuFaSSa uses a novel word truncation objective that drops unfaithful tokens located by MuFaSSa in both the decoder input and output. Alignments with human-annotated faithfulness labels on AggreFact show that MuFaSSa is comparable to or better than existing metrics built on classifiers or QA models pre-trained on other tasks. In experiments on summarization with XSum and CNN/DailyMail, models trained with word truncation using MuFaSSa outperform competitive methods according to both automatic faithfulness metrics and human assessments.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cao-etal-2023-multi">
<titleInfo>
<title>Multi-View Source Ablation for Faithful Summarization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shuyang</namePart>
<namePart type="family">Cao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liang</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Di</namePart>
<namePart type="family">Lu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Robert</namePart>
<namePart type="given">L</namePart>
<namePart type="family">Logan IV</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joel</namePart>
<namePart type="family">Tetreault</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alejandro</namePart>
<namePart type="family">Jaimes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EACL 2023</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we present MuFaSSa (Multi-view Faithfulness Scoring via Source Ablation), a metric for evaluating faithfulness of abstractive summaries, and for guiding training of more faithful summarizers. For evaluation, MuFaSSa employs different strategies (e.g., masking entity mentions) to first remove information from the source document to form multiple ablated views. Then, the faithfulness level of each token in a generated summary is measured by the difference between the token generation probabilities when given the original document and the ablated document as inputs to trained summarizers. For training, MuFaSSa uses a novel word truncation objective that drops unfaithful tokens located by MuFaSSa in both the decoder input and output. Alignments with human-annotated faithfulness labels on AggreFact show that MuFaSSa is comparable to or better than existing metrics built on classifiers or QA models pre-trained on other tasks. In experiments on summarization with XSum and CNN/DailyMail, models trained with word truncation using MuFaSSa outperform competitive methods according to both automatic faithfulness metrics and human assessments.</abstract>
<identifier type="citekey">cao-etal-2023-multi</identifier>
<identifier type="doi">10.18653/v1/2023.findings-eacl.151</identifier>
<location>
<url>https://aclanthology.org/2023.findings-eacl.151</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>2029</start>
<end>2047</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multi-View Source Ablation for Faithful Summarization
%A Cao, Shuyang
%A Ma, Liang
%A Lu, Di
%A Logan IV, Robert L.
%A Tetreault, Joel
%A Jaimes, Alejandro
%S Findings of the Association for Computational Linguistics: EACL 2023
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F cao-etal-2023-multi
%X In this paper, we present MuFaSSa (Multi-view Faithfulness Scoring via Source Ablation), a metric for evaluating faithfulness of abstractive summaries, and for guiding training of more faithful summarizers. For evaluation, MuFaSSa employs different strategies (e.g., masking entity mentions) to first remove information from the source document to form multiple ablated views. Then, the faithfulness level of each token in a generated summary is measured by the difference between the token generation probabilities when given the original document and the ablated document as inputs to trained summarizers. For training, MuFaSSa uses a novel word truncation objective that drops unfaithful tokens located by MuFaSSa in both the decoder input and output. Alignments with human-annotated faithfulness labels on AggreFact show that MuFaSSa is comparable to or better than existing metrics built on classifiers or QA models pre-trained on other tasks. In experiments on summarization with XSum and CNN/DailyMail, models trained with word truncation using MuFaSSa outperform competitive methods according to both automatic faithfulness metrics and human assessments.
%R 10.18653/v1/2023.findings-eacl.151
%U https://aclanthology.org/2023.findings-eacl.151
%U https://doi.org/10.18653/v1/2023.findings-eacl.151
%P 2029-2047
Markdown (Informal)
[Multi-View Source Ablation for Faithful Summarization](https://aclanthology.org/2023.findings-eacl.151) (Cao et al., Findings 2023)
ACL
- Shuyang Cao, Liang Ma, Di Lu, Robert L Logan IV, Joel Tetreault, and Alejandro Jaimes. 2023. Multi-View Source Ablation for Faithful Summarization. In Findings of the Association for Computational Linguistics: EACL 2023, pages 2029–2047, Dubrovnik, Croatia. Association for Computational Linguistics.