@inproceedings{liu-etal-2026-pru,
title = "Pru-{C}o{T}: Towards Efficient Reasoning Distillation via Pruning Chain-of-Thought",
author = "Liu, Han and
Ma, Shuotian and
Li, Hui and
Zhang, Xiaotong and
Ma, Fenglong and
Yu, Hong",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.1684/",
pages = "33744--33756",
ISBN = "979-8-89176-395-1",
abstract = "Knowledge distillation has emerged as a pivotal paradigm for transferring the superior reasoning capabilities of Large Reasoning Models (LRMs) to efficient student models. However, the raw Chain-of-Thought (CoT) trajectories are often verbose and redundant, which dilutes the underlying logic and hinders effective knowledge distillation for student models. Although recent work has focused on pruning CoT to streamline these reasoning paths, existing local heuristic methods often fail to capture global causal logic due to rigid rules and limited search spaces, while global heuristic approaches incur substantial computational costs. To address these issues, we propose Pru-CoT (Pruning Chain-of-Thought), a framework that aims to extract the essential logical structure from reasoning chains. Pru-CoT implements a step-level importance assessment via global optimization on a frozen student large language model (LLM), quantifying the gradient-based causal contribution of each component. Guided by these important signals, the framework performs fidelity-constrained pruning, utilizing an LLM-driven process to synthesize concise, logically coherent narratives. Extensive experiments on mathematical reasoning benchmarks demonstrate that models trained with Pru-CoT not only achieve superior accuracy but also generate significantly more compact reasoning paths compared to those trained on raw verbose data."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="liu-etal-2026-pru">
<titleInfo>
<title>Pru-CoT: Towards Efficient Reasoning Distillation via Pruning Chain-of-Thought</title>
</titleInfo>
<name type="personal">
<namePart type="given">Han</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shuotian</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hui</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaotong</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fenglong</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hong</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>Knowledge distillation has emerged as a pivotal paradigm for transferring the superior reasoning capabilities of Large Reasoning Models (LRMs) to efficient student models. However, the raw Chain-of-Thought (CoT) trajectories are often verbose and redundant, which dilutes the underlying logic and hinders effective knowledge distillation for student models. Although recent work has focused on pruning CoT to streamline these reasoning paths, existing local heuristic methods often fail to capture global causal logic due to rigid rules and limited search spaces, while global heuristic approaches incur substantial computational costs. To address these issues, we propose Pru-CoT (Pruning Chain-of-Thought), a framework that aims to extract the essential logical structure from reasoning chains. Pru-CoT implements a step-level importance assessment via global optimization on a frozen student large language model (LLM), quantifying the gradient-based causal contribution of each component. Guided by these important signals, the framework performs fidelity-constrained pruning, utilizing an LLM-driven process to synthesize concise, logically coherent narratives. Extensive experiments on mathematical reasoning benchmarks demonstrate that models trained with Pru-CoT not only achieve superior accuracy but also generate significantly more compact reasoning paths compared to those trained on raw verbose data.</abstract>
<identifier type="citekey">liu-etal-2026-pru</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.1684/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>33744</start>
<end>33756</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Pru-CoT: Towards Efficient Reasoning Distillation via Pruning Chain-of-Thought
%A Liu, Han
%A Ma, Shuotian
%A Li, Hui
%A Zhang, Xiaotong
%A Ma, Fenglong
%A Yu, Hong
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F liu-etal-2026-pru
%X Knowledge distillation has emerged as a pivotal paradigm for transferring the superior reasoning capabilities of Large Reasoning Models (LRMs) to efficient student models. However, the raw Chain-of-Thought (CoT) trajectories are often verbose and redundant, which dilutes the underlying logic and hinders effective knowledge distillation for student models. Although recent work has focused on pruning CoT to streamline these reasoning paths, existing local heuristic methods often fail to capture global causal logic due to rigid rules and limited search spaces, while global heuristic approaches incur substantial computational costs. To address these issues, we propose Pru-CoT (Pruning Chain-of-Thought), a framework that aims to extract the essential logical structure from reasoning chains. Pru-CoT implements a step-level importance assessment via global optimization on a frozen student large language model (LLM), quantifying the gradient-based causal contribution of each component. Guided by these important signals, the framework performs fidelity-constrained pruning, utilizing an LLM-driven process to synthesize concise, logically coherent narratives. Extensive experiments on mathematical reasoning benchmarks demonstrate that models trained with Pru-CoT not only achieve superior accuracy but also generate significantly more compact reasoning paths compared to those trained on raw verbose data.
%U https://aclanthology.org/2026.findings-acl.1684/
%P 33744-33756
Markdown (Informal)
[Pru-CoT: Towards Efficient Reasoning Distillation via Pruning Chain-of-Thought](https://aclanthology.org/2026.findings-acl.1684/) (Liu et al., Findings 2026)
ACL