@inproceedings{guo-etal-2026-failures,
title = "Failures are Treasures: Constructing a Pedagogical Bridge for Agentic Strategy Distillation",
author = "Guo, Jiaxin and
Sun, Hao and
Zhang, Wenhao and
Yang, Chunyu and
Zhang, Yan",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.938/",
pages = "18808--18823",
ISBN = "979-8-89176-395-1",
abstract = {While Large Language Models (LLMs) excel in autonomous agent settings, small language models (SLMs) remain fragile, often collapsing after encountering errors. Traditional knowledge distillation focuses on imitating successful trajectories, while existing ``learning from mistakes'' methods treat errors as auxiliary signals rather than states requiring recoverable policies, leaving the dynamics of failure and recovery in agent settings largely unexplored. Inspired by Donald Sch{\"o}n{'}s theory of reflective practice, we propose P-BRIDGE (Pedagogical Bridge for Reflective Insight and Distillation of Guiding Errors). P-BRIDGE combines reflection-in-action with reflection-on-action, enabling agents to diagnose and correct critical errors during execution while abstracting transferable strategies from contrastive student{--}teacher trajectories. Experiments across eight benchmarks demonstrate that P-BRIDGE significantly elevates SLM performance{---}e.g., raising the 2WikiMultiHopQA accuracy of a 0.6B model from 6.2{\%} to 34.2{\%}.}
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="guo-etal-2026-failures">
<titleInfo>
<title>Failures are Treasures: Constructing a Pedagogical Bridge for Agentic Strategy Distillation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jiaxin</namePart>
<namePart type="family">Guo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hao</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wenhao</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chunyu</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yan</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>While Large Language Models (LLMs) excel in autonomous agent settings, small language models (SLMs) remain fragile, often collapsing after encountering errors. Traditional knowledge distillation focuses on imitating successful trajectories, while existing “learning from mistakes” methods treat errors as auxiliary signals rather than states requiring recoverable policies, leaving the dynamics of failure and recovery in agent settings largely unexplored. Inspired by Donald Schön’s theory of reflective practice, we propose P-BRIDGE (Pedagogical Bridge for Reflective Insight and Distillation of Guiding Errors). P-BRIDGE combines reflection-in-action with reflection-on-action, enabling agents to diagnose and correct critical errors during execution while abstracting transferable strategies from contrastive student–teacher trajectories. Experiments across eight benchmarks demonstrate that P-BRIDGE significantly elevates SLM performance—e.g., raising the 2WikiMultiHopQA accuracy of a 0.6B model from 6.2% to 34.2%.</abstract>
<identifier type="citekey">guo-etal-2026-failures</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.938/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>18808</start>
<end>18823</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Failures are Treasures: Constructing a Pedagogical Bridge for Agentic Strategy Distillation
%A Guo, Jiaxin
%A Sun, Hao
%A Zhang, Wenhao
%A Yang, Chunyu
%A Zhang, Yan
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F guo-etal-2026-failures
%X While Large Language Models (LLMs) excel in autonomous agent settings, small language models (SLMs) remain fragile, often collapsing after encountering errors. Traditional knowledge distillation focuses on imitating successful trajectories, while existing “learning from mistakes” methods treat errors as auxiliary signals rather than states requiring recoverable policies, leaving the dynamics of failure and recovery in agent settings largely unexplored. Inspired by Donald Schön’s theory of reflective practice, we propose P-BRIDGE (Pedagogical Bridge for Reflective Insight and Distillation of Guiding Errors). P-BRIDGE combines reflection-in-action with reflection-on-action, enabling agents to diagnose and correct critical errors during execution while abstracting transferable strategies from contrastive student–teacher trajectories. Experiments across eight benchmarks demonstrate that P-BRIDGE significantly elevates SLM performance—e.g., raising the 2WikiMultiHopQA accuracy of a 0.6B model from 6.2% to 34.2%.
%U https://aclanthology.org/2026.findings-acl.938/
%P 18808-18823
Markdown (Informal)
[Failures are Treasures: Constructing a Pedagogical Bridge for Agentic Strategy Distillation](https://aclanthology.org/2026.findings-acl.938/) (Guo et al., Findings 2026)
ACL