@inproceedings{niwa-etal-2025-rectifying,
title = "Rectifying Belief Space via Unlearning to Harness {LLM}s' Reasoning",
author = "Niwa, Ayana and
Kaneko, Masahiro and
Inui, Kentaro",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.1285/",
doi = "10.18653/v1/2025.findings-acl.1285",
pages = "25060--25075",
ISBN = "979-8-89176-256-5",
abstract = "Large Language Models (LLMs) exhibit sophisticated reasoning yet still generate incorrect answers. We attribute these errors to **Spurious Beliefs**, defined as propositions the model internally considers as true despite being factually false. To reduce reasoning errors, we propose a belief space rectification framework. Our method first identifies the beliefs invoked during inference via an explanation{-}based approach with Forward{-}Backward Beam Search (FBBS). We subsequently apply unlearning via gradient ascent to suppress spurious beliefs and enhance true ones, thereby effectively rectifying the model{'}s belief space. Experiments on three QA datasets and three LLMs show that our method significantly reduces erroneous reasoning and improves generalization."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="niwa-etal-2025-rectifying">
<titleInfo>
<title>Rectifying Belief Space via Unlearning to Harness LLMs’ Reasoning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ayana</namePart>
<namePart type="family">Niwa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Masahiro</namePart>
<namePart type="family">Kaneko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-256-5</identifier>
</relatedItem>
<abstract>Large Language Models (LLMs) exhibit sophisticated reasoning yet still generate incorrect answers. We attribute these errors to **Spurious Beliefs**, defined as propositions the model internally considers as true despite being factually false. To reduce reasoning errors, we propose a belief space rectification framework. Our method first identifies the beliefs invoked during inference via an explanation-based approach with Forward-Backward Beam Search (FBBS). We subsequently apply unlearning via gradient ascent to suppress spurious beliefs and enhance true ones, thereby effectively rectifying the model’s belief space. Experiments on three QA datasets and three LLMs show that our method significantly reduces erroneous reasoning and improves generalization.</abstract>
<identifier type="citekey">niwa-etal-2025-rectifying</identifier>
<identifier type="doi">10.18653/v1/2025.findings-acl.1285</identifier>
<location>
<url>https://aclanthology.org/2025.findings-acl.1285/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>25060</start>
<end>25075</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Rectifying Belief Space via Unlearning to Harness LLMs’ Reasoning
%A Niwa, Ayana
%A Kaneko, Masahiro
%A Inui, Kentaro
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Findings of the Association for Computational Linguistics: ACL 2025
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-256-5
%F niwa-etal-2025-rectifying
%X Large Language Models (LLMs) exhibit sophisticated reasoning yet still generate incorrect answers. We attribute these errors to **Spurious Beliefs**, defined as propositions the model internally considers as true despite being factually false. To reduce reasoning errors, we propose a belief space rectification framework. Our method first identifies the beliefs invoked during inference via an explanation-based approach with Forward-Backward Beam Search (FBBS). We subsequently apply unlearning via gradient ascent to suppress spurious beliefs and enhance true ones, thereby effectively rectifying the model’s belief space. Experiments on three QA datasets and three LLMs show that our method significantly reduces erroneous reasoning and improves generalization.
%R 10.18653/v1/2025.findings-acl.1285
%U https://aclanthology.org/2025.findings-acl.1285/
%U https://doi.org/10.18653/v1/2025.findings-acl.1285
%P 25060-25075
Markdown (Informal)
[Rectifying Belief Space via Unlearning to Harness LLMs’ Reasoning](https://aclanthology.org/2025.findings-acl.1285/) (Niwa et al., Findings 2025)
ACL