@inproceedings{zhao-etal-2026-demystify,
title = "Demystify the Role of Memory in Machine Learning Engineering Agents",
author = "Zhao, Xinyu and
Wang, Junpeng and
Chen, Yuzhong and
Pan, Menghai and
Yeh, Chin-Chia Michael and
Sun, Jiarui and
Zheng, Yan and
Das, Mahashweta and
Chen, Tianlong",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.525/",
pages = "10811--10826",
ISBN = "979-8-89176-395-1",
abstract = "While memory is a core component in agent systems, its behavioral impact in complex, long-horizon domains like machine learning engineering (MLE) remains poorly understood. Unlike short, reactive exchanges, MLE agents solve tasks through cycles of experimentation and improvement where past errors can inform future success. This paper presents a systematic study dissecting how memory influences agent behavior and performance across diverse MLE challenges. We first introduce a dynamic coding memory designed to capture and reuse debugging experiences, and integrate it into two representative agent paradigms: a sequential, chain-based agent that mirrors human-like iterative refinement, and a parallel, tree-based agent that performs broad, self-exploratory search in the code space. Our central finding is that the role of memory is contingent on the agent{'}s underlying architecture. For chain-based agents, memory proves highly beneficial, enabling them to avoid recurring mistakes and engage in more coherent, iterative refinement, which significantly improves reliability and task success. In contrast, for tree-based search agents, memory introduces a critical trade-off: it enhances procedural stability at the cost of constraining search diversity, which can prematurely narrow exploration and lead to suboptimal final solutions. These findings reveal a fundamental trade-off between procedural reliability and solution innovation modulated by memory, offering insights for designing more effective and robust MLE agents."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhao-etal-2026-demystify">
<titleInfo>
<title>Demystify the Role of Memory in Machine Learning Engineering Agents</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xinyu</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junpeng</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuzhong</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Menghai</namePart>
<namePart type="family">Pan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chin-Chia</namePart>
<namePart type="given">Michael</namePart>
<namePart type="family">Yeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiarui</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yan</namePart>
<namePart type="family">Zheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mahashweta</namePart>
<namePart type="family">Das</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tianlong</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>While memory is a core component in agent systems, its behavioral impact in complex, long-horizon domains like machine learning engineering (MLE) remains poorly understood. Unlike short, reactive exchanges, MLE agents solve tasks through cycles of experimentation and improvement where past errors can inform future success. This paper presents a systematic study dissecting how memory influences agent behavior and performance across diverse MLE challenges. We first introduce a dynamic coding memory designed to capture and reuse debugging experiences, and integrate it into two representative agent paradigms: a sequential, chain-based agent that mirrors human-like iterative refinement, and a parallel, tree-based agent that performs broad, self-exploratory search in the code space. Our central finding is that the role of memory is contingent on the agent’s underlying architecture. For chain-based agents, memory proves highly beneficial, enabling them to avoid recurring mistakes and engage in more coherent, iterative refinement, which significantly improves reliability and task success. In contrast, for tree-based search agents, memory introduces a critical trade-off: it enhances procedural stability at the cost of constraining search diversity, which can prematurely narrow exploration and lead to suboptimal final solutions. These findings reveal a fundamental trade-off between procedural reliability and solution innovation modulated by memory, offering insights for designing more effective and robust MLE agents.</abstract>
<identifier type="citekey">zhao-etal-2026-demystify</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.525/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>10811</start>
<end>10826</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Demystify the Role of Memory in Machine Learning Engineering Agents
%A Zhao, Xinyu
%A Wang, Junpeng
%A Chen, Yuzhong
%A Pan, Menghai
%A Yeh, Chin-Chia Michael
%A Sun, Jiarui
%A Zheng, Yan
%A Das, Mahashweta
%A Chen, Tianlong
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F zhao-etal-2026-demystify
%X While memory is a core component in agent systems, its behavioral impact in complex, long-horizon domains like machine learning engineering (MLE) remains poorly understood. Unlike short, reactive exchanges, MLE agents solve tasks through cycles of experimentation and improvement where past errors can inform future success. This paper presents a systematic study dissecting how memory influences agent behavior and performance across diverse MLE challenges. We first introduce a dynamic coding memory designed to capture and reuse debugging experiences, and integrate it into two representative agent paradigms: a sequential, chain-based agent that mirrors human-like iterative refinement, and a parallel, tree-based agent that performs broad, self-exploratory search in the code space. Our central finding is that the role of memory is contingent on the agent’s underlying architecture. For chain-based agents, memory proves highly beneficial, enabling them to avoid recurring mistakes and engage in more coherent, iterative refinement, which significantly improves reliability and task success. In contrast, for tree-based search agents, memory introduces a critical trade-off: it enhances procedural stability at the cost of constraining search diversity, which can prematurely narrow exploration and lead to suboptimal final solutions. These findings reveal a fundamental trade-off between procedural reliability and solution innovation modulated by memory, offering insights for designing more effective and robust MLE agents.
%U https://aclanthology.org/2026.findings-acl.525/
%P 10811-10826
Markdown (Informal)
[Demystify the Role of Memory in Machine Learning Engineering Agents](https://aclanthology.org/2026.findings-acl.525/) (Zhao et al., Findings 2026)
ACL
- Xinyu Zhao, Junpeng Wang, Yuzhong Chen, Menghai Pan, Chin-Chia Michael Yeh, Jiarui Sun, Yan Zheng, Mahashweta Das, and Tianlong Chen. 2026. Demystify the Role of Memory in Machine Learning Engineering Agents. In Findings of the Association for Computational Linguistics: ACL 2026, pages 10811–10826, San Diego, California, United States. Association for Computational Linguistics.