@inproceedings{feng-etal-2026-searl,
title = "{SEARL}: Joint Optimization of Policy and Tool Graph Memory for Self-Evolving Agents",
author = "Feng, Xinshun and
Song, Xinhao and
Li, Lijun and
Liu, Gongshen and
Shao, Jing",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-long.1125/",
pages = "24518--24535",
ISBN = "979-8-89176-390-6",
abstract = "Recent advances in Reinforcement Learning with Verifiable Rewards (RLVR) have demonstrated significant potential in single-turn reasoning tasks. With the paradigm shift toward self-evolving agentic learning, models are increasingly expected to learn from trajectories by synthesizing tools or accumulating explicit experiences. However, prevailing methods typically rely on large-scale LLMs or multi-agent frameworks, which hinder their deployment in resource-constrained environments. The inherent sparsity of outcome-based rewards also poses a substantial challenge, as agents typically receive feedback only upon task completion. To address these limitations, we introduce a Tool-Memory based self-evolving agentic framework SEARL. Unlike approaches that directly utilize interaction experiences, our method constructs a structured experience memory that integrates planning with execution. This provides a novel form of state abstraction that facilitates the aggregation of actions within functionally analogous contexts, such as tool reuse. Consequently, agents not only extract explicit knowledge from historical data but also leverage inter-trajectory correlations to densify reward signals. We evaluate our framework on knowledge reasoning and complex search tasks, demonstrating its effectiveness in achieving more practical and efficient agentic learning."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="feng-etal-2026-searl">
<titleInfo>
<title>SEARL: Joint Optimization of Policy and Tool Graph Memory for Self-Evolving Agents</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xinshun</namePart>
<namePart type="family">Feng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xinhao</namePart>
<namePart type="family">Song</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lijun</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gongshen</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jing</namePart>
<namePart type="family">Shao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-390-6</identifier>
</relatedItem>
<abstract>Recent advances in Reinforcement Learning with Verifiable Rewards (RLVR) have demonstrated significant potential in single-turn reasoning tasks. With the paradigm shift toward self-evolving agentic learning, models are increasingly expected to learn from trajectories by synthesizing tools or accumulating explicit experiences. However, prevailing methods typically rely on large-scale LLMs or multi-agent frameworks, which hinder their deployment in resource-constrained environments. The inherent sparsity of outcome-based rewards also poses a substantial challenge, as agents typically receive feedback only upon task completion. To address these limitations, we introduce a Tool-Memory based self-evolving agentic framework SEARL. Unlike approaches that directly utilize interaction experiences, our method constructs a structured experience memory that integrates planning with execution. This provides a novel form of state abstraction that facilitates the aggregation of actions within functionally analogous contexts, such as tool reuse. Consequently, agents not only extract explicit knowledge from historical data but also leverage inter-trajectory correlations to densify reward signals. We evaluate our framework on knowledge reasoning and complex search tasks, demonstrating its effectiveness in achieving more practical and efficient agentic learning.</abstract>
<identifier type="citekey">feng-etal-2026-searl</identifier>
<location>
<url>https://aclanthology.org/2026.acl-long.1125/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>24518</start>
<end>24535</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SEARL: Joint Optimization of Policy and Tool Graph Memory for Self-Evolving Agents
%A Feng, Xinshun
%A Song, Xinhao
%A Li, Lijun
%A Liu, Gongshen
%A Shao, Jing
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-390-6
%F feng-etal-2026-searl
%X Recent advances in Reinforcement Learning with Verifiable Rewards (RLVR) have demonstrated significant potential in single-turn reasoning tasks. With the paradigm shift toward self-evolving agentic learning, models are increasingly expected to learn from trajectories by synthesizing tools or accumulating explicit experiences. However, prevailing methods typically rely on large-scale LLMs or multi-agent frameworks, which hinder their deployment in resource-constrained environments. The inherent sparsity of outcome-based rewards also poses a substantial challenge, as agents typically receive feedback only upon task completion. To address these limitations, we introduce a Tool-Memory based self-evolving agentic framework SEARL. Unlike approaches that directly utilize interaction experiences, our method constructs a structured experience memory that integrates planning with execution. This provides a novel form of state abstraction that facilitates the aggregation of actions within functionally analogous contexts, such as tool reuse. Consequently, agents not only extract explicit knowledge from historical data but also leverage inter-trajectory correlations to densify reward signals. We evaluate our framework on knowledge reasoning and complex search tasks, demonstrating its effectiveness in achieving more practical and efficient agentic learning.
%U https://aclanthology.org/2026.acl-long.1125/
%P 24518-24535
Markdown (Informal)
[SEARL: Joint Optimization of Policy and Tool Graph Memory for Self-Evolving Agents](https://aclanthology.org/2026.acl-long.1125/) (Feng et al., ACL 2026)
ACL