@inproceedings{fan-etal-2026-ketchup,
title = "{KETCHUP}: K-Step Return Estimation for Sequential Knowledge Distillation",
author = "Fan, Jiabin and
Luo, Guoqing and
Bowling, Michael and
Mou, Lili",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {EACL} 2026",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-eacl.39/",
pages = "778--796",
ISBN = "979-8-89176-386-9",
abstract = "We propose a novel $K$-step return estimation method (called $K$ETCHUP) for Reinforcement Learning (RL)-based knowledge distillation (KD) in text generation tasks. Our idea is to induce a $K$-step return by using the Bellman Optimality Equation for multiple steps. Theoretical analysis shows that this $K$-step formulation reduces the variance of the gradient estimates, thus leading to improved RL optimization, especially when the student model size is large. Empirical evaluation on three text generation tasks demonstrates that our approach yields superior performance in both standard task metrics and large language model (LLM)-based evaluation. These results suggest that our $K$-step return induction offers a promising direction for enhancing RL-based KD in LLM research."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fan-etal-2026-ketchup">
<titleInfo>
<title>KETCHUP: K-Step Return Estimation for Sequential Knowledge Distillation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jiabin</namePart>
<namePart type="family">Fan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guoqing</namePart>
<namePart type="family">Luo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Bowling</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lili</namePart>
<namePart type="family">Mou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Marquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-386-9</identifier>
</relatedItem>
<abstract>We propose a novel K-step return estimation method (called KETCHUP) for Reinforcement Learning (RL)-based knowledge distillation (KD) in text generation tasks. Our idea is to induce a K-step return by using the Bellman Optimality Equation for multiple steps. Theoretical analysis shows that this K-step formulation reduces the variance of the gradient estimates, thus leading to improved RL optimization, especially when the student model size is large. Empirical evaluation on three text generation tasks demonstrates that our approach yields superior performance in both standard task metrics and large language model (LLM)-based evaluation. These results suggest that our K-step return induction offers a promising direction for enhancing RL-based KD in LLM research.</abstract>
<identifier type="citekey">fan-etal-2026-ketchup</identifier>
<location>
<url>https://aclanthology.org/2026.findings-eacl.39/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>778</start>
<end>796</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T KETCHUP: K-Step Return Estimation for Sequential Knowledge Distillation
%A Fan, Jiabin
%A Luo, Guoqing
%A Bowling, Michael
%A Mou, Lili
%Y Demberg, Vera
%Y Inui, Kentaro
%Y Marquez, Lluís
%S Findings of the Association for Computational Linguistics: EACL 2026
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-386-9
%F fan-etal-2026-ketchup
%X We propose a novel K-step return estimation method (called KETCHUP) for Reinforcement Learning (RL)-based knowledge distillation (KD) in text generation tasks. Our idea is to induce a K-step return by using the Bellman Optimality Equation for multiple steps. Theoretical analysis shows that this K-step formulation reduces the variance of the gradient estimates, thus leading to improved RL optimization, especially when the student model size is large. Empirical evaluation on three text generation tasks demonstrates that our approach yields superior performance in both standard task metrics and large language model (LLM)-based evaluation. These results suggest that our K-step return induction offers a promising direction for enhancing RL-based KD in LLM research.
%U https://aclanthology.org/2026.findings-eacl.39/
%P 778-796
Markdown (Informal)
[KETCHUP: K-Step Return Estimation for Sequential Knowledge Distillation](https://aclanthology.org/2026.findings-eacl.39/) (Fan et al., Findings 2026)
ACL