@inproceedings{mitsuhashi-etal-2026-disentangling,
title = "Disentangling the Effects of Unlearning in Measuring Parametric Faithfulness of Chain-of-Thought",
author = "Mitsuhashi, Ryo and
Morio, Gaku and
Niwa, Ayana and
Kaneko, Masahiro and
Inui, Kentaro and
Morishita, Terufumi and
Koreeda, Yuta and
Sogawa, Yasuhiro",
editor = "T.Y.S.S., Santosh and
Rodriguez, Juan Diego and
de Gibert, Ona",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics ({ACL} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-srw.36/",
pages = "413--419",
ISBN = "979-8-89176-393-7",
abstract = "Chain-of-Thought (CoT) in large language models (LLMs) has been widely debated in terms of whether it faithfully reflects an internal reasoning process of models. Parametric faithfulness is a recently proposed metric that uses unlearning to assess whether a model encodes parametric beliefs corresponding to a reasoning chain. This paper refines this metric by accounting for the unintended artifacts of unlearning. We introduce control tasks that unlearn irrelevant knowledge and word-shuffled content and show that these control tasks yield substantial parametric faithfulness values, suggesting the non-negligible effect of unlearning. We also found that control tasks help explain the significant variations in parametric faithfulness observed across different model sizes and CoT lengths. We conclude that the effects of unlearning need to be considered when measuring parametric faithfulness."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mitsuhashi-etal-2026-disentangling">
<titleInfo>
<title>Disentangling the Effects of Unlearning in Measuring Parametric Faithfulness of Chain-of-Thought</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ryo</namePart>
<namePart type="family">Mitsuhashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gaku</namePart>
<namePart type="family">Morio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ayana</namePart>
<namePart type="family">Niwa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Masahiro</namePart>
<namePart type="family">Kaneko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Terufumi</namePart>
<namePart type="family">Morishita</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuta</namePart>
<namePart type="family">Koreeda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yasuhiro</namePart>
<namePart type="family">Sogawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (ACL 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Santosh</namePart>
<namePart type="family">T.Y.S.S.</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="given">Diego</namePart>
<namePart type="family">Rodriguez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ona</namePart>
<namePart type="family">de Gibert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-393-7</identifier>
</relatedItem>
<abstract>Chain-of-Thought (CoT) in large language models (LLMs) has been widely debated in terms of whether it faithfully reflects an internal reasoning process of models. Parametric faithfulness is a recently proposed metric that uses unlearning to assess whether a model encodes parametric beliefs corresponding to a reasoning chain. This paper refines this metric by accounting for the unintended artifacts of unlearning. We introduce control tasks that unlearn irrelevant knowledge and word-shuffled content and show that these control tasks yield substantial parametric faithfulness values, suggesting the non-negligible effect of unlearning. We also found that control tasks help explain the significant variations in parametric faithfulness observed across different model sizes and CoT lengths. We conclude that the effects of unlearning need to be considered when measuring parametric faithfulness.</abstract>
<identifier type="citekey">mitsuhashi-etal-2026-disentangling</identifier>
<location>
<url>https://aclanthology.org/2026.acl-srw.36/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>413</start>
<end>419</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Disentangling the Effects of Unlearning in Measuring Parametric Faithfulness of Chain-of-Thought
%A Mitsuhashi, Ryo
%A Morio, Gaku
%A Niwa, Ayana
%A Kaneko, Masahiro
%A Inui, Kentaro
%A Morishita, Terufumi
%A Koreeda, Yuta
%A Sogawa, Yasuhiro
%Y T.Y.S.S., Santosh
%Y Rodriguez, Juan Diego
%Y de Gibert, Ona
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (ACL 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-393-7
%F mitsuhashi-etal-2026-disentangling
%X Chain-of-Thought (CoT) in large language models (LLMs) has been widely debated in terms of whether it faithfully reflects an internal reasoning process of models. Parametric faithfulness is a recently proposed metric that uses unlearning to assess whether a model encodes parametric beliefs corresponding to a reasoning chain. This paper refines this metric by accounting for the unintended artifacts of unlearning. We introduce control tasks that unlearn irrelevant knowledge and word-shuffled content and show that these control tasks yield substantial parametric faithfulness values, suggesting the non-negligible effect of unlearning. We also found that control tasks help explain the significant variations in parametric faithfulness observed across different model sizes and CoT lengths. We conclude that the effects of unlearning need to be considered when measuring parametric faithfulness.
%U https://aclanthology.org/2026.acl-srw.36/
%P 413-419
Markdown (Informal)
[Disentangling the Effects of Unlearning in Measuring Parametric Faithfulness of Chain-of-Thought](https://aclanthology.org/2026.acl-srw.36/) (Mitsuhashi et al., ACL 2026)
ACL
- Ryo Mitsuhashi, Gaku Morio, Ayana Niwa, Masahiro Kaneko, Kentaro Inui, Terufumi Morishita, Yuta Koreeda, and Yasuhiro Sogawa. 2026. Disentangling the Effects of Unlearning in Measuring Parametric Faithfulness of Chain-of-Thought. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (ACL 2026), pages 413–419, San Diego, California, United States. Association for Computational Linguistics.