@inproceedings{wu-etal-2026-enhancing,
title = "Enhancing Long-Chain Reasoning Distillation through Error-Aware Self-Reflection",
author = "Wu, Zhuoyang and
Li, Xinze and
Liu, Zhenghao and
Yan, Yukun and
Liu, Zhiyuan and
Yu, Minghe and
Yang, Cheng and
Gu, Yu and
Yu, Ge and
Sun, Maosong",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.72/",
pages = "1465--1482",
ISBN = "979-8-89176-395-1",
abstract = "Large Language Models (LLMs) have exhibited strong reasoning capabilities and achieved remarkable performance in mathematical problem-solving tasks. Recently, distilling reasoning ability from long-form Chains-of-Thought (CoTs) has emerged as a promising approach for enhancing Small Language Models (SLMs). Existing studies typically treat SLMs as student models and use long-form CoTs as supervision signals for Supervised Fine-Tuning (SFT) to transfer reasoning ability. However, such long-form CoT teachers are usually unaware of the student model{'}s capacity, which limits the effective utilization of the provided reasoning traces. To overcome this limitation, we propose error-aware self-reflection (ORION), a framework that refines teacher CoTs through an Error-Aware Reflection process. ORION enables the student model to construct more tailored teacher CoTs by refining teacher CoTs and incorporating its own reasoning errors. Experiments on multiple mathematical reasoning benchmarks demonstrate that ORION consistently improves performance by more than 2{\%} over all baselines. Further analysis reveals that the CoTs constructed by ORION exhibit higher coherence and logical consistency, thereby serving as more effective supervision signals for SFT. All codes are available at https://github.com/NEUIR/ORION."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wu-etal-2026-enhancing">
<titleInfo>
<title>Enhancing Long-Chain Reasoning Distillation through Error-Aware Self-Reflection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zhuoyang</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xinze</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhenghao</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yukun</namePart>
<namePart type="family">Yan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhiyuan</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Minghe</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cheng</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Gu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ge</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maosong</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>Large Language Models (LLMs) have exhibited strong reasoning capabilities and achieved remarkable performance in mathematical problem-solving tasks. Recently, distilling reasoning ability from long-form Chains-of-Thought (CoTs) has emerged as a promising approach for enhancing Small Language Models (SLMs). Existing studies typically treat SLMs as student models and use long-form CoTs as supervision signals for Supervised Fine-Tuning (SFT) to transfer reasoning ability. However, such long-form CoT teachers are usually unaware of the student model’s capacity, which limits the effective utilization of the provided reasoning traces. To overcome this limitation, we propose error-aware self-reflection (ORION), a framework that refines teacher CoTs through an Error-Aware Reflection process. ORION enables the student model to construct more tailored teacher CoTs by refining teacher CoTs and incorporating its own reasoning errors. Experiments on multiple mathematical reasoning benchmarks demonstrate that ORION consistently improves performance by more than 2% over all baselines. Further analysis reveals that the CoTs constructed by ORION exhibit higher coherence and logical consistency, thereby serving as more effective supervision signals for SFT. All codes are available at https://github.com/NEUIR/ORION.</abstract>
<identifier type="citekey">wu-etal-2026-enhancing</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.72/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>1465</start>
<end>1482</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Enhancing Long-Chain Reasoning Distillation through Error-Aware Self-Reflection
%A Wu, Zhuoyang
%A Li, Xinze
%A Liu, Zhenghao
%A Yan, Yukun
%A Liu, Zhiyuan
%A Yu, Minghe
%A Yang, Cheng
%A Gu, Yu
%A Yu, Ge
%A Sun, Maosong
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F wu-etal-2026-enhancing
%X Large Language Models (LLMs) have exhibited strong reasoning capabilities and achieved remarkable performance in mathematical problem-solving tasks. Recently, distilling reasoning ability from long-form Chains-of-Thought (CoTs) has emerged as a promising approach for enhancing Small Language Models (SLMs). Existing studies typically treat SLMs as student models and use long-form CoTs as supervision signals for Supervised Fine-Tuning (SFT) to transfer reasoning ability. However, such long-form CoT teachers are usually unaware of the student model’s capacity, which limits the effective utilization of the provided reasoning traces. To overcome this limitation, we propose error-aware self-reflection (ORION), a framework that refines teacher CoTs through an Error-Aware Reflection process. ORION enables the student model to construct more tailored teacher CoTs by refining teacher CoTs and incorporating its own reasoning errors. Experiments on multiple mathematical reasoning benchmarks demonstrate that ORION consistently improves performance by more than 2% over all baselines. Further analysis reveals that the CoTs constructed by ORION exhibit higher coherence and logical consistency, thereby serving as more effective supervision signals for SFT. All codes are available at https://github.com/NEUIR/ORION.
%U https://aclanthology.org/2026.findings-acl.72/
%P 1465-1482
Markdown (Informal)
[Enhancing Long-Chain Reasoning Distillation through Error-Aware Self-Reflection](https://aclanthology.org/2026.findings-acl.72/) (Wu et al., Findings 2026)
ACL
- Zhuoyang Wu, Xinze Li, Zhenghao Liu, Yukun Yan, Zhiyuan Liu, Minghe Yu, Cheng Yang, Yu Gu, Ge Yu, and Maosong Sun. 2026. Enhancing Long-Chain Reasoning Distillation through Error-Aware Self-Reflection. In Findings of the Association for Computational Linguistics: ACL 2026, pages 1465–1482, San Diego, California, United States. Association for Computational Linguistics.