@inproceedings{choubey-etal-2023-cape,
title = "{C}a{PE}: Contrastive Parameter Ensembling for Reducing Hallucination in Abstractive Summarization",
author = "Choubey, Prafulla Kumar and
Fabbri, Alex and
Vig, Jesse and
Wu, Chien-Sheng and
Liu, Wenhao and
Rajani, Nazneen",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-acl.685",
doi = "10.18653/v1/2023.findings-acl.685",
pages = "10755--10773",
abstract = "Hallucination is a known issue for neural abstractive summarization models. Recent work suggests that the degree of hallucination may depend on factual errors in the training data. In this work, we propose a new method called Contrastive Parameter Ensembling (CaPE) to use training data more effectively, utilizing variations in noise in training samples to reduce hallucination. Starting with a base model fine-tuned on an entire dataset, we additionally train expert and anti-expert models on clean and noisy subsets of the data, respectively. We then adjust the parameters of the base model by adding (subtracting) the parameters of the expert (anti-expert), advancing the recent work on additive parameter ensembling approaches. Trained on a much smaller data subset, expert and anti-expert models only fractionally ({\textless}14{\%}) increases the total training time. Further, CaPE uses parameter ensembling and does not increase the inference time. Experimental results show that CaPE improves performance across different automatic factual metrics and human evaluation, with a maximum improvement of 16.69{\%} and 15.38{\%} on summary-level dependency-arc entailment accuracy for the XSUM and CNN/DM datasets. The CaPE model performs comparably to the base model on metrics of informativeness such as ROUGE.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="choubey-etal-2023-cape">
<titleInfo>
<title>CaPE: Contrastive Parameter Ensembling for Reducing Hallucination in Abstractive Summarization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Prafulla</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Choubey</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alex</namePart>
<namePart type="family">Fabbri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jesse</namePart>
<namePart type="family">Vig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chien-Sheng</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wenhao</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nazneen</namePart>
<namePart type="family">Rajani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Boyd-Graber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naoaki</namePart>
<namePart type="family">Okazaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Hallucination is a known issue for neural abstractive summarization models. Recent work suggests that the degree of hallucination may depend on factual errors in the training data. In this work, we propose a new method called Contrastive Parameter Ensembling (CaPE) to use training data more effectively, utilizing variations in noise in training samples to reduce hallucination. Starting with a base model fine-tuned on an entire dataset, we additionally train expert and anti-expert models on clean and noisy subsets of the data, respectively. We then adjust the parameters of the base model by adding (subtracting) the parameters of the expert (anti-expert), advancing the recent work on additive parameter ensembling approaches. Trained on a much smaller data subset, expert and anti-expert models only fractionally (\textless14%) increases the total training time. Further, CaPE uses parameter ensembling and does not increase the inference time. Experimental results show that CaPE improves performance across different automatic factual metrics and human evaluation, with a maximum improvement of 16.69% and 15.38% on summary-level dependency-arc entailment accuracy for the XSUM and CNN/DM datasets. The CaPE model performs comparably to the base model on metrics of informativeness such as ROUGE.</abstract>
<identifier type="citekey">choubey-etal-2023-cape</identifier>
<identifier type="doi">10.18653/v1/2023.findings-acl.685</identifier>
<location>
<url>https://aclanthology.org/2023.findings-acl.685</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>10755</start>
<end>10773</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CaPE: Contrastive Parameter Ensembling for Reducing Hallucination in Abstractive Summarization
%A Choubey, Prafulla Kumar
%A Fabbri, Alex
%A Vig, Jesse
%A Wu, Chien-Sheng
%A Liu, Wenhao
%A Rajani, Nazneen
%Y Rogers, Anna
%Y Boyd-Graber, Jordan
%Y Okazaki, Naoaki
%S Findings of the Association for Computational Linguistics: ACL 2023
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F choubey-etal-2023-cape
%X Hallucination is a known issue for neural abstractive summarization models. Recent work suggests that the degree of hallucination may depend on factual errors in the training data. In this work, we propose a new method called Contrastive Parameter Ensembling (CaPE) to use training data more effectively, utilizing variations in noise in training samples to reduce hallucination. Starting with a base model fine-tuned on an entire dataset, we additionally train expert and anti-expert models on clean and noisy subsets of the data, respectively. We then adjust the parameters of the base model by adding (subtracting) the parameters of the expert (anti-expert), advancing the recent work on additive parameter ensembling approaches. Trained on a much smaller data subset, expert and anti-expert models only fractionally (\textless14%) increases the total training time. Further, CaPE uses parameter ensembling and does not increase the inference time. Experimental results show that CaPE improves performance across different automatic factual metrics and human evaluation, with a maximum improvement of 16.69% and 15.38% on summary-level dependency-arc entailment accuracy for the XSUM and CNN/DM datasets. The CaPE model performs comparably to the base model on metrics of informativeness such as ROUGE.
%R 10.18653/v1/2023.findings-acl.685
%U https://aclanthology.org/2023.findings-acl.685
%U https://doi.org/10.18653/v1/2023.findings-acl.685
%P 10755-10773
Markdown (Informal)
[CaPE: Contrastive Parameter Ensembling for Reducing Hallucination in Abstractive Summarization](https://aclanthology.org/2023.findings-acl.685) (Choubey et al., Findings 2023)
ACL