@inproceedings{ge-etal-2023-detection,
title = "Detection and Mitigation of the Negative Impact of Dataset Extractivity on Abstractive Summarization",
author = "Ge, Yubin and
Jeoung, Sullam and
Dinh, Ly and
Diesner, Jana",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-acl.877",
doi = "10.18653/v1/2023.findings-acl.877",
pages = "13963--13976",
abstract = "In text summarization, extractivity is defined as a measurement of the degree of overlap between a source document and its summary. Previous research has shown that the extractivity level of training data can influence both output extractivity and the amount of factual information (i.e. faithfulness) in outputs for abstractive summarization. However, it remains unclear if and how extractivity impacts the performance of abstractive models. In this work, we investigate the relationship between dataset extractivity and model performance by comparing the performance of trained models under different degrees of extractivity. We find that while low levels of extractivity can improve performance, as extractivity increases, performance is negatively impacted. Furthermore, through an analysis of the model{'}s copy continuity of content, we discover that higher extractivity leads to a greater tendency for the model to copy text continuously from the source document rather than identifying and summarizing important content that should be covered in the target summary. To address these issues, we propose a simple and effective method to design copy labels for fixing the model{'}s copying behaviors and train the model with a copy mechanism. The experimental results illustrate the effectiveness of our strategy in alleviating the negative impact on model performance resulting from high dataset extractivity, and that our method outperforms several competitive baselines.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ge-etal-2023-detection">
<titleInfo>
<title>Detection and Mitigation of the Negative Impact of Dataset Extractivity on Abstractive Summarization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yubin</namePart>
<namePart type="family">Ge</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sullam</namePart>
<namePart type="family">Jeoung</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ly</namePart>
<namePart type="family">Dinh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jana</namePart>
<namePart type="family">Diesner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Boyd-Graber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naoaki</namePart>
<namePart type="family">Okazaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In text summarization, extractivity is defined as a measurement of the degree of overlap between a source document and its summary. Previous research has shown that the extractivity level of training data can influence both output extractivity and the amount of factual information (i.e. faithfulness) in outputs for abstractive summarization. However, it remains unclear if and how extractivity impacts the performance of abstractive models. In this work, we investigate the relationship between dataset extractivity and model performance by comparing the performance of trained models under different degrees of extractivity. We find that while low levels of extractivity can improve performance, as extractivity increases, performance is negatively impacted. Furthermore, through an analysis of the model’s copy continuity of content, we discover that higher extractivity leads to a greater tendency for the model to copy text continuously from the source document rather than identifying and summarizing important content that should be covered in the target summary. To address these issues, we propose a simple and effective method to design copy labels for fixing the model’s copying behaviors and train the model with a copy mechanism. The experimental results illustrate the effectiveness of our strategy in alleviating the negative impact on model performance resulting from high dataset extractivity, and that our method outperforms several competitive baselines.</abstract>
<identifier type="citekey">ge-etal-2023-detection</identifier>
<identifier type="doi">10.18653/v1/2023.findings-acl.877</identifier>
<location>
<url>https://aclanthology.org/2023.findings-acl.877</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>13963</start>
<end>13976</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Detection and Mitigation of the Negative Impact of Dataset Extractivity on Abstractive Summarization
%A Ge, Yubin
%A Jeoung, Sullam
%A Dinh, Ly
%A Diesner, Jana
%Y Rogers, Anna
%Y Boyd-Graber, Jordan
%Y Okazaki, Naoaki
%S Findings of the Association for Computational Linguistics: ACL 2023
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F ge-etal-2023-detection
%X In text summarization, extractivity is defined as a measurement of the degree of overlap between a source document and its summary. Previous research has shown that the extractivity level of training data can influence both output extractivity and the amount of factual information (i.e. faithfulness) in outputs for abstractive summarization. However, it remains unclear if and how extractivity impacts the performance of abstractive models. In this work, we investigate the relationship between dataset extractivity and model performance by comparing the performance of trained models under different degrees of extractivity. We find that while low levels of extractivity can improve performance, as extractivity increases, performance is negatively impacted. Furthermore, through an analysis of the model’s copy continuity of content, we discover that higher extractivity leads to a greater tendency for the model to copy text continuously from the source document rather than identifying and summarizing important content that should be covered in the target summary. To address these issues, we propose a simple and effective method to design copy labels for fixing the model’s copying behaviors and train the model with a copy mechanism. The experimental results illustrate the effectiveness of our strategy in alleviating the negative impact on model performance resulting from high dataset extractivity, and that our method outperforms several competitive baselines.
%R 10.18653/v1/2023.findings-acl.877
%U https://aclanthology.org/2023.findings-acl.877
%U https://doi.org/10.18653/v1/2023.findings-acl.877
%P 13963-13976
Markdown (Informal)
[Detection and Mitigation of the Negative Impact of Dataset Extractivity on Abstractive Summarization](https://aclanthology.org/2023.findings-acl.877) (Ge et al., Findings 2023)
ACL