@inproceedings{choi-etal-2025-think,
title = "Think Clearly: Improving Reasoning via Redundant Token Pruning",
author = "Choi, Daewon and
Lee, Jimin and
Tack, Jihoon and
Song, Woomin and
Dingliwal, Saket and
Jayanthi, Sai Muralidhar and
Ganesh, Bhavana and
Shin, Jinwoo and
Galstyan, Aram and
Bodapati, Sravan Babu",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-emnlp.1169/",
pages = "21437--21451",
ISBN = "979-8-89176-335-7",
abstract = "Recent large language models have shown promising capabilities in long-form reasoning, following structured chains of thought before arriving at a final answer. However, we observe that these reasoning paths tend to include substantial redundancy; analyzing attention patterns reveals that attention scores are widely scattered, particularly incorrect answers exhibit greater attention sparsity. In this paper, we demonstrate that deliberately removing this redundancy in the reasoning process significantly improves the performance through clear thinking (i.e., removing distraction). Specifically, we systematically identify such redundancy by measuring token-level attention scores to a special end-of-thinking token, which is appended to an explicit instruction inserted to conclude each intermediate reasoning step. Furthermore, we propose structure-aware pruning that prioritizes removing tokens in low-contributing reasoning chunks over individual tokens. After evicting redundant tokens, we remove the injected end-of-thinking instruction, then resume the reasoning generation. We demonstrate that our method significantly improves the over all accuracy across reasoning-intensive benchmarks without any training involved. In particular, our method shows strong performance on challenging mathematics competition benchmarks such as AIME and AMC, where reasoning redundancy is more prevalent."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="choi-etal-2025-think">
<titleInfo>
<title>Think Clearly: Improving Reasoning via Redundant Token Pruning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Daewon</namePart>
<namePart type="family">Choi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jimin</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jihoon</namePart>
<namePart type="family">Tack</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Woomin</namePart>
<namePart type="family">Song</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saket</namePart>
<namePart type="family">Dingliwal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sai</namePart>
<namePart type="given">Muralidhar</namePart>
<namePart type="family">Jayanthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bhavana</namePart>
<namePart type="family">Ganesh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jinwoo</namePart>
<namePart type="family">Shin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aram</namePart>
<namePart type="family">Galstyan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sravan</namePart>
<namePart type="given">Babu</namePart>
<namePart type="family">Bodapati</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-335-7</identifier>
</relatedItem>
<abstract>Recent large language models have shown promising capabilities in long-form reasoning, following structured chains of thought before arriving at a final answer. However, we observe that these reasoning paths tend to include substantial redundancy; analyzing attention patterns reveals that attention scores are widely scattered, particularly incorrect answers exhibit greater attention sparsity. In this paper, we demonstrate that deliberately removing this redundancy in the reasoning process significantly improves the performance through clear thinking (i.e., removing distraction). Specifically, we systematically identify such redundancy by measuring token-level attention scores to a special end-of-thinking token, which is appended to an explicit instruction inserted to conclude each intermediate reasoning step. Furthermore, we propose structure-aware pruning that prioritizes removing tokens in low-contributing reasoning chunks over individual tokens. After evicting redundant tokens, we remove the injected end-of-thinking instruction, then resume the reasoning generation. We demonstrate that our method significantly improves the over all accuracy across reasoning-intensive benchmarks without any training involved. In particular, our method shows strong performance on challenging mathematics competition benchmarks such as AIME and AMC, where reasoning redundancy is more prevalent.</abstract>
<identifier type="citekey">choi-etal-2025-think</identifier>
<location>
<url>https://aclanthology.org/2025.findings-emnlp.1169/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>21437</start>
<end>21451</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Think Clearly: Improving Reasoning via Redundant Token Pruning
%A Choi, Daewon
%A Lee, Jimin
%A Tack, Jihoon
%A Song, Woomin
%A Dingliwal, Saket
%A Jayanthi, Sai Muralidhar
%A Ganesh, Bhavana
%A Shin, Jinwoo
%A Galstyan, Aram
%A Bodapati, Sravan Babu
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Findings of the Association for Computational Linguistics: EMNLP 2025
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-335-7
%F choi-etal-2025-think
%X Recent large language models have shown promising capabilities in long-form reasoning, following structured chains of thought before arriving at a final answer. However, we observe that these reasoning paths tend to include substantial redundancy; analyzing attention patterns reveals that attention scores are widely scattered, particularly incorrect answers exhibit greater attention sparsity. In this paper, we demonstrate that deliberately removing this redundancy in the reasoning process significantly improves the performance through clear thinking (i.e., removing distraction). Specifically, we systematically identify such redundancy by measuring token-level attention scores to a special end-of-thinking token, which is appended to an explicit instruction inserted to conclude each intermediate reasoning step. Furthermore, we propose structure-aware pruning that prioritizes removing tokens in low-contributing reasoning chunks over individual tokens. After evicting redundant tokens, we remove the injected end-of-thinking instruction, then resume the reasoning generation. We demonstrate that our method significantly improves the over all accuracy across reasoning-intensive benchmarks without any training involved. In particular, our method shows strong performance on challenging mathematics competition benchmarks such as AIME and AMC, where reasoning redundancy is more prevalent.
%U https://aclanthology.org/2025.findings-emnlp.1169/
%P 21437-21451
Markdown (Informal)
[Think Clearly: Improving Reasoning via Redundant Token Pruning](https://aclanthology.org/2025.findings-emnlp.1169/) (Choi et al., Findings 2025)
ACL
- Daewon Choi, Jimin Lee, Jihoon Tack, Woomin Song, Saket Dingliwal, Sai Muralidhar Jayanthi, Bhavana Ganesh, Jinwoo Shin, Aram Galstyan, and Sravan Babu Bodapati. 2025. Think Clearly: Improving Reasoning via Redundant Token Pruning. In Findings of the Association for Computational Linguistics: EMNLP 2025, pages 21437–21451, Suzhou, China. Association for Computational Linguistics.