@inproceedings{luo-etal-2024-intractability,
title = "On the Intractability to Synthesize Factual Inconsistencies in Summarization",
author = "Luo, Ge and
Fan, Weisi and
Li, Miaoran and
He, Youbiao and
Yang, Yinfei and
Bao, Forrest",
editor = "Graham, Yvette and
Purver, Matthew",
booktitle = "Findings of the Association for Computational Linguistics: EACL 2024",
month = mar,
year = "2024",
address = "St. Julian{'}s, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-eacl.69",
pages = "1026--1037",
abstract = "Factual consistency detection has gotten raised attention in the task of abstractive summarization. Many existing works rely on synthetic training data, which may not accurately reflect or match the inconsistencies produced by summarization models. In this paper, we first systematically analyze the shortcomings of the current methods in synthesizing inconsistent summaries. Current synthesis methods may fail to produce inconsistencies of coreference errors and discourse errors, per our quantitative and qualitative study. Then, employing the parameter-efficient finetuning (PEFT) technique, we discover that a competitive factual consistency detector can be achieved using thousands of real model-generated summaries with human annotations. Our study demonstrates the importance of real machine-generated texts with human annotation in NLG evaluation as our model outperforms the SOTA on the CoGenSumm, FactCC, Frank, and SummEval datasets.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="luo-etal-2024-intractability">
<titleInfo>
<title>On the Intractability to Synthesize Factual Inconsistencies in Summarization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ge</namePart>
<namePart type="family">Luo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Weisi</namePart>
<namePart type="family">Fan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Miaoran</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Youbiao</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yinfei</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Forrest</namePart>
<namePart type="family">Bao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EACL 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yvette</namePart>
<namePart type="family">Graham</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="family">Purver</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St. Julian’s, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Factual consistency detection has gotten raised attention in the task of abstractive summarization. Many existing works rely on synthetic training data, which may not accurately reflect or match the inconsistencies produced by summarization models. In this paper, we first systematically analyze the shortcomings of the current methods in synthesizing inconsistent summaries. Current synthesis methods may fail to produce inconsistencies of coreference errors and discourse errors, per our quantitative and qualitative study. Then, employing the parameter-efficient finetuning (PEFT) technique, we discover that a competitive factual consistency detector can be achieved using thousands of real model-generated summaries with human annotations. Our study demonstrates the importance of real machine-generated texts with human annotation in NLG evaluation as our model outperforms the SOTA on the CoGenSumm, FactCC, Frank, and SummEval datasets.</abstract>
<identifier type="citekey">luo-etal-2024-intractability</identifier>
<location>
<url>https://aclanthology.org/2024.findings-eacl.69</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>1026</start>
<end>1037</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T On the Intractability to Synthesize Factual Inconsistencies in Summarization
%A Luo, Ge
%A Fan, Weisi
%A Li, Miaoran
%A He, Youbiao
%A Yang, Yinfei
%A Bao, Forrest
%Y Graham, Yvette
%Y Purver, Matthew
%S Findings of the Association for Computational Linguistics: EACL 2024
%D 2024
%8 March
%I Association for Computational Linguistics
%C St. Julian’s, Malta
%F luo-etal-2024-intractability
%X Factual consistency detection has gotten raised attention in the task of abstractive summarization. Many existing works rely on synthetic training data, which may not accurately reflect or match the inconsistencies produced by summarization models. In this paper, we first systematically analyze the shortcomings of the current methods in synthesizing inconsistent summaries. Current synthesis methods may fail to produce inconsistencies of coreference errors and discourse errors, per our quantitative and qualitative study. Then, employing the parameter-efficient finetuning (PEFT) technique, we discover that a competitive factual consistency detector can be achieved using thousands of real model-generated summaries with human annotations. Our study demonstrates the importance of real machine-generated texts with human annotation in NLG evaluation as our model outperforms the SOTA on the CoGenSumm, FactCC, Frank, and SummEval datasets.
%U https://aclanthology.org/2024.findings-eacl.69
%P 1026-1037
Markdown (Informal)
[On the Intractability to Synthesize Factual Inconsistencies in Summarization](https://aclanthology.org/2024.findings-eacl.69) (Luo et al., Findings 2024)
ACL