@inproceedings{do-etal-2025-sample,
title = "Sample Efficient Alignment Learning With Episodic Control",
author = "Do, Van Dai and
Tran, Quan Hung and
Kirmani, Ahmed and
Zhang, Lu and
Le, Hung",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-emnlp.560/",
pages = "10601--10618",
ISBN = "979-8-89176-335-7",
abstract = "Aligning large language models (LLMs) with specific task objectives is challenging, especially when access to feedback signals for guiding the model is limited. While existing parametric methods perform reasonably, they rely heavily on large datasets and frequent feedback, making them impractical in scenarios with limited human feedback. We introduce Alignment Learning with Episodic Control (ALEC), a non-parametric framework that aligns LLM outputs during inference without fine-tuning. ALEC employs a key-value memory to store the associations between generated text and its corresponding values. It leverages a novel confidence-based writing scheme to update these stored values, maximizing the use of available data. During inference, ALEC utilizes a nearest-neighbor mechanism to estimate the values of generated texts, enabling the selection of the optimal text for decoding. Our method outperforms state-of-the-art baselines on harmless, helpful, and summarization tasks, demonstrating improved alignment with minimal interactions with the true reward model."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="do-etal-2025-sample">
<titleInfo>
<title>Sample Efficient Alignment Learning With Episodic Control</title>
</titleInfo>
<name type="personal">
<namePart type="given">Van</namePart>
<namePart type="given">Dai</namePart>
<namePart type="family">Do</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Quan</namePart>
<namePart type="given">Hung</namePart>
<namePart type="family">Tran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahmed</namePart>
<namePart type="family">Kirmani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lu</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hung</namePart>
<namePart type="family">Le</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-335-7</identifier>
</relatedItem>
<abstract>Aligning large language models (LLMs) with specific task objectives is challenging, especially when access to feedback signals for guiding the model is limited. While existing parametric methods perform reasonably, they rely heavily on large datasets and frequent feedback, making them impractical in scenarios with limited human feedback. We introduce Alignment Learning with Episodic Control (ALEC), a non-parametric framework that aligns LLM outputs during inference without fine-tuning. ALEC employs a key-value memory to store the associations between generated text and its corresponding values. It leverages a novel confidence-based writing scheme to update these stored values, maximizing the use of available data. During inference, ALEC utilizes a nearest-neighbor mechanism to estimate the values of generated texts, enabling the selection of the optimal text for decoding. Our method outperforms state-of-the-art baselines on harmless, helpful, and summarization tasks, demonstrating improved alignment with minimal interactions with the true reward model.</abstract>
<identifier type="citekey">do-etal-2025-sample</identifier>
<location>
<url>https://aclanthology.org/2025.findings-emnlp.560/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>10601</start>
<end>10618</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Sample Efficient Alignment Learning With Episodic Control
%A Do, Van Dai
%A Tran, Quan Hung
%A Kirmani, Ahmed
%A Zhang, Lu
%A Le, Hung
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Findings of the Association for Computational Linguistics: EMNLP 2025
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-335-7
%F do-etal-2025-sample
%X Aligning large language models (LLMs) with specific task objectives is challenging, especially when access to feedback signals for guiding the model is limited. While existing parametric methods perform reasonably, they rely heavily on large datasets and frequent feedback, making them impractical in scenarios with limited human feedback. We introduce Alignment Learning with Episodic Control (ALEC), a non-parametric framework that aligns LLM outputs during inference without fine-tuning. ALEC employs a key-value memory to store the associations between generated text and its corresponding values. It leverages a novel confidence-based writing scheme to update these stored values, maximizing the use of available data. During inference, ALEC utilizes a nearest-neighbor mechanism to estimate the values of generated texts, enabling the selection of the optimal text for decoding. Our method outperforms state-of-the-art baselines on harmless, helpful, and summarization tasks, demonstrating improved alignment with minimal interactions with the true reward model.
%U https://aclanthology.org/2025.findings-emnlp.560/
%P 10601-10618
Markdown (Informal)
[Sample Efficient Alignment Learning With Episodic Control](https://aclanthology.org/2025.findings-emnlp.560/) (Do et al., Findings 2025)
ACL