@inproceedings{guan-etal-2026-knowing,
title = "Knowing When to Quit: Diagnosing and Training {LLM}s to Abort Futile Reasoning",
author = "Guan, Xinyan and
Zeng, Jiali and
Xin, Chunlei and
Lu, Yaojie and
Lin, Hongyu and
Han, Xianpei and
Sun, Le and
Meng, Fandong",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.830/",
pages = "16823--16835",
ISBN = "979-8-89176-395-1",
abstract = "Large language models generate computationally expensive yet semantically void reasoning on beyond-capability tasks, creating safety risks where plausible-sounding but incorrect derivations mislead users. We characterize this futile reasoning phenomenon through systematic analysis, revealing universal capability overreach and systematic miscalibration towards over-confidence. The dominant failure mode is specious reasoning, superficially valid outputs with subtle hallucinations, which escalates with task difficulty. We demonstrate that prompt engineering proves insufficient to calibrate refusal behavior. To address this, we introduce CaRL (Capability-aligned Reinforcement Learning), which aligns model behavior with capability boundaries through reward shaping that incentivizes refusal over hallucination and hindsight augmentation that converts failures into refusal supervision. Experiments demonstrate a substantial reduction in futile reasoning while preserving performance across task difficulties, effectively achieving capability-aligned behavior without sacrificing utility."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="guan-etal-2026-knowing">
<titleInfo>
<title>Knowing When to Quit: Diagnosing and Training LLMs to Abort Futile Reasoning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xinyan</namePart>
<namePart type="family">Guan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiali</namePart>
<namePart type="family">Zeng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chunlei</namePart>
<namePart type="family">Xin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yaojie</namePart>
<namePart type="family">Lu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hongyu</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xianpei</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Le</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fandong</namePart>
<namePart type="family">Meng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>Large language models generate computationally expensive yet semantically void reasoning on beyond-capability tasks, creating safety risks where plausible-sounding but incorrect derivations mislead users. We characterize this futile reasoning phenomenon through systematic analysis, revealing universal capability overreach and systematic miscalibration towards over-confidence. The dominant failure mode is specious reasoning, superficially valid outputs with subtle hallucinations, which escalates with task difficulty. We demonstrate that prompt engineering proves insufficient to calibrate refusal behavior. To address this, we introduce CaRL (Capability-aligned Reinforcement Learning), which aligns model behavior with capability boundaries through reward shaping that incentivizes refusal over hallucination and hindsight augmentation that converts failures into refusal supervision. Experiments demonstrate a substantial reduction in futile reasoning while preserving performance across task difficulties, effectively achieving capability-aligned behavior without sacrificing utility.</abstract>
<identifier type="citekey">guan-etal-2026-knowing</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.830/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>16823</start>
<end>16835</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Knowing When to Quit: Diagnosing and Training LLMs to Abort Futile Reasoning
%A Guan, Xinyan
%A Zeng, Jiali
%A Xin, Chunlei
%A Lu, Yaojie
%A Lin, Hongyu
%A Han, Xianpei
%A Sun, Le
%A Meng, Fandong
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F guan-etal-2026-knowing
%X Large language models generate computationally expensive yet semantically void reasoning on beyond-capability tasks, creating safety risks where plausible-sounding but incorrect derivations mislead users. We characterize this futile reasoning phenomenon through systematic analysis, revealing universal capability overreach and systematic miscalibration towards over-confidence. The dominant failure mode is specious reasoning, superficially valid outputs with subtle hallucinations, which escalates with task difficulty. We demonstrate that prompt engineering proves insufficient to calibrate refusal behavior. To address this, we introduce CaRL (Capability-aligned Reinforcement Learning), which aligns model behavior with capability boundaries through reward shaping that incentivizes refusal over hallucination and hindsight augmentation that converts failures into refusal supervision. Experiments demonstrate a substantial reduction in futile reasoning while preserving performance across task difficulties, effectively achieving capability-aligned behavior without sacrificing utility.
%U https://aclanthology.org/2026.findings-acl.830/
%P 16823-16835
Markdown (Informal)
[Knowing When to Quit: Diagnosing and Training LLMs to Abort Futile Reasoning](https://aclanthology.org/2026.findings-acl.830/) (Guan et al., Findings 2026)
ACL
- Xinyan Guan, Jiali Zeng, Chunlei Xin, Yaojie Lu, Hongyu Lin, Xianpei Han, Le Sun, and Fandong Meng. 2026. Knowing When to Quit: Diagnosing and Training LLMs to Abort Futile Reasoning. In Findings of the Association for Computational Linguistics: ACL 2026, pages 16823–16835, San Diego, California, United States. Association for Computational Linguistics.