@inproceedings{liu-etal-2025-sara,
title = "{SARA}: Salience-Aware Reinforced Adaptive Decoding for Large Language Models in Abstractive Summarization",
author = "Liu, Nayu and
Zhu, Junnan and
Ma, Yiming and
Lu, Zhicong and
Xu, Wenlei and
Yang, Yong and
Zhong, Jiang and
Wei, Kaiwen",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-long.1236/",
doi = "10.18653/v1/2025.acl-long.1236",
pages = "25450--25463",
ISBN = "979-8-89176-251-0",
abstract = "LLMs have improved the fluency and informativeness of abstractive summarization but remain prone to hallucinations, where generated content deviates from the source document. Recent PMI decoding strategies mitigate over-reliance on prior knowledge by comparing output probabilities with and without source documents, effectively enhancing contextual utilization and improving faithfulness. However, existing strategies often neglect the explicit use of salient contextual information and rely on static hyperparameters to fix the balance between contextual and prior knowledge, limiting their flexibility. In this work, we propose Salience-Aware Reinforced Adaptive decoding (SARA), which incorporates salient information and allows the model to adaptively determine reliance on the source document{'}s context, salient context, and the model{'}s prior knowledge based on pointwise mutual information. Moreover, a tokenwise adaptive decoding mechanism via reinforcement learning is proposed in SARA to dynamically adjust the contributions of context and prior knowledge at each decoding timestep. Experiments on CNN/DM, WikiHow, and NYT50 datasets show that SARA consistently improves the quality and faithfulness of summaries across various LLM backbones without modifying their weights."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="liu-etal-2025-sara">
<titleInfo>
<title>SARA: Salience-Aware Reinforced Adaptive Decoding for Large Language Models in Abstractive Summarization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nayu</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junnan</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yiming</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhicong</namePart>
<namePart type="family">Lu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wenlei</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yong</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiang</namePart>
<namePart type="family">Zhong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kaiwen</namePart>
<namePart type="family">Wei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-251-0</identifier>
</relatedItem>
<abstract>LLMs have improved the fluency and informativeness of abstractive summarization but remain prone to hallucinations, where generated content deviates from the source document. Recent PMI decoding strategies mitigate over-reliance on prior knowledge by comparing output probabilities with and without source documents, effectively enhancing contextual utilization and improving faithfulness. However, existing strategies often neglect the explicit use of salient contextual information and rely on static hyperparameters to fix the balance between contextual and prior knowledge, limiting their flexibility. In this work, we propose Salience-Aware Reinforced Adaptive decoding (SARA), which incorporates salient information and allows the model to adaptively determine reliance on the source document’s context, salient context, and the model’s prior knowledge based on pointwise mutual information. Moreover, a tokenwise adaptive decoding mechanism via reinforcement learning is proposed in SARA to dynamically adjust the contributions of context and prior knowledge at each decoding timestep. Experiments on CNN/DM, WikiHow, and NYT50 datasets show that SARA consistently improves the quality and faithfulness of summaries across various LLM backbones without modifying their weights.</abstract>
<identifier type="citekey">liu-etal-2025-sara</identifier>
<identifier type="doi">10.18653/v1/2025.acl-long.1236</identifier>
<location>
<url>https://aclanthology.org/2025.acl-long.1236/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>25450</start>
<end>25463</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SARA: Salience-Aware Reinforced Adaptive Decoding for Large Language Models in Abstractive Summarization
%A Liu, Nayu
%A Zhu, Junnan
%A Ma, Yiming
%A Lu, Zhicong
%A Xu, Wenlei
%A Yang, Yong
%A Zhong, Jiang
%A Wei, Kaiwen
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-251-0
%F liu-etal-2025-sara
%X LLMs have improved the fluency and informativeness of abstractive summarization but remain prone to hallucinations, where generated content deviates from the source document. Recent PMI decoding strategies mitigate over-reliance on prior knowledge by comparing output probabilities with and without source documents, effectively enhancing contextual utilization and improving faithfulness. However, existing strategies often neglect the explicit use of salient contextual information and rely on static hyperparameters to fix the balance between contextual and prior knowledge, limiting their flexibility. In this work, we propose Salience-Aware Reinforced Adaptive decoding (SARA), which incorporates salient information and allows the model to adaptively determine reliance on the source document’s context, salient context, and the model’s prior knowledge based on pointwise mutual information. Moreover, a tokenwise adaptive decoding mechanism via reinforcement learning is proposed in SARA to dynamically adjust the contributions of context and prior knowledge at each decoding timestep. Experiments on CNN/DM, WikiHow, and NYT50 datasets show that SARA consistently improves the quality and faithfulness of summaries across various LLM backbones without modifying their weights.
%R 10.18653/v1/2025.acl-long.1236
%U https://aclanthology.org/2025.acl-long.1236/
%U https://doi.org/10.18653/v1/2025.acl-long.1236
%P 25450-25463
Markdown (Informal)
[SARA: Salience-Aware Reinforced Adaptive Decoding for Large Language Models in Abstractive Summarization](https://aclanthology.org/2025.acl-long.1236/) (Liu et al., ACL 2025)
ACL
- Nayu Liu, Junnan Zhu, Yiming Ma, Zhicong Lu, Wenlei Xu, Yong Yang, Jiang Zhong, and Kaiwen Wei. 2025. SARA: Salience-Aware Reinforced Adaptive Decoding for Large Language Models in Abstractive Summarization. In Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 25450–25463, Vienna, Austria. Association for Computational Linguistics.