@inproceedings{yang-etal-2026-agentic,
title = "Agentic Episodic Control",
author = "Yang, Xidong and
Li, Wenhao and
Sheng, Junjie and
Hua, Yun and
Chen, Haosheng and
Shen, Chuyun and
Wang, Xiangfeng",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.654/",
pages = "13355--13370",
ISBN = "979-8-89176-395-1",
abstract = "Reinforcement learning (RL) remains fundamentally limited by poor data efficiency and weak generalization. Prior episodic RL methods attempt to alleviate this via external memory modules, yet they suffer from two key limitations: a representation bottleneck caused by shallow encoders, and a retrieval dilemma where episodic memory is accessed indiscriminately.To address these challenges, we propose Agentic Episodic Control (AEC), a novel architecture that integrates large language models (LLMs) into episodic RL.AEC uses an LLM-based semantic augmenter to generate semantic representations from raw observations, and a critical state recognizer to selectively retrieve valuable experiences.This transforms memory usage from passive similarity matching into strategic, context-aware recall.Across five BabyAI-Text environments, AEC achieves 2{--}6{\texttimes} higher data efficiency than baselines and is the only method to solve complex tasks like UnlockLocal with over 90{\%} success.It further demonstrates strong cross-task and cross-environment generalization, maintaining performance even under distribution shifts.AEC shows that combining LLM-derived priors with reinforcement learning yields more sample-efficient and adaptable agents. Code is available at https://github.com/Xidong-Yang/Agentic{\_}Episodic{\_}Control."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yang-etal-2026-agentic">
<titleInfo>
<title>Agentic Episodic Control</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xidong</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wenhao</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junjie</namePart>
<namePart type="family">Sheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun</namePart>
<namePart type="family">Hua</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haosheng</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chuyun</namePart>
<namePart type="family">Shen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiangfeng</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>Reinforcement learning (RL) remains fundamentally limited by poor data efficiency and weak generalization. Prior episodic RL methods attempt to alleviate this via external memory modules, yet they suffer from two key limitations: a representation bottleneck caused by shallow encoders, and a retrieval dilemma where episodic memory is accessed indiscriminately.To address these challenges, we propose Agentic Episodic Control (AEC), a novel architecture that integrates large language models (LLMs) into episodic RL.AEC uses an LLM-based semantic augmenter to generate semantic representations from raw observations, and a critical state recognizer to selectively retrieve valuable experiences.This transforms memory usage from passive similarity matching into strategic, context-aware recall.Across five BabyAI-Text environments, AEC achieves 2–6× higher data efficiency than baselines and is the only method to solve complex tasks like UnlockLocal with over 90% success.It further demonstrates strong cross-task and cross-environment generalization, maintaining performance even under distribution shifts.AEC shows that combining LLM-derived priors with reinforcement learning yields more sample-efficient and adaptable agents. Code is available at https://github.com/Xidong-Yang/Agentic_Episodic_Control.</abstract>
<identifier type="citekey">yang-etal-2026-agentic</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.654/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>13355</start>
<end>13370</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Agentic Episodic Control
%A Yang, Xidong
%A Li, Wenhao
%A Sheng, Junjie
%A Hua, Yun
%A Chen, Haosheng
%A Shen, Chuyun
%A Wang, Xiangfeng
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F yang-etal-2026-agentic
%X Reinforcement learning (RL) remains fundamentally limited by poor data efficiency and weak generalization. Prior episodic RL methods attempt to alleviate this via external memory modules, yet they suffer from two key limitations: a representation bottleneck caused by shallow encoders, and a retrieval dilemma where episodic memory is accessed indiscriminately.To address these challenges, we propose Agentic Episodic Control (AEC), a novel architecture that integrates large language models (LLMs) into episodic RL.AEC uses an LLM-based semantic augmenter to generate semantic representations from raw observations, and a critical state recognizer to selectively retrieve valuable experiences.This transforms memory usage from passive similarity matching into strategic, context-aware recall.Across five BabyAI-Text environments, AEC achieves 2–6× higher data efficiency than baselines and is the only method to solve complex tasks like UnlockLocal with over 90% success.It further demonstrates strong cross-task and cross-environment generalization, maintaining performance even under distribution shifts.AEC shows that combining LLM-derived priors with reinforcement learning yields more sample-efficient and adaptable agents. Code is available at https://github.com/Xidong-Yang/Agentic_Episodic_Control.
%U https://aclanthology.org/2026.findings-acl.654/
%P 13355-13370
Markdown (Informal)
[Agentic Episodic Control](https://aclanthology.org/2026.findings-acl.654/) (Yang et al., Findings 2026)
ACL
- Xidong Yang, Wenhao Li, Junjie Sheng, Yun Hua, Haosheng Chen, Chuyun Shen, and Xiangfeng Wang. 2026. Agentic Episodic Control. In Findings of the Association for Computational Linguistics: ACL 2026, pages 13355–13370, San Diego, California, United States. Association for Computational Linguistics.