@inproceedings{qiu-etal-2026-tree,
title = "Tree-Notebook: A Context-Aware Agent with Tree Search and Entropy-Aware Data Shadow for Interactive Data Science",
author = "Qiu, Junkun and
Huang, Min and
Miao, Qinghai",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.783/",
pages = "15957--15970",
ISBN = "979-8-89176-395-1",
abstract = "While LLM-based agents have emerged as a focal point for automating data science tasks, they continue to grapple with inefficient context management, ``silent failures'' (where code executes correctly but fails the task objectives), and error propagation inherent in sequential generation. In this paper, we propose Tree-Notebook, an agentic framework designed to mimic the iterative cognitive process of human data scientists. At its core, Tree-Notebook conceptualizes Jupyter Notebook cells as nodes within a tree structure, facilitating organized and efficient context retrieval. We formalize the task-solving process as a Partially Observable Markov Decision Process (POMDP) over a dynamic tree, utilizing an entropy-based information gain function for path evaluation to enhance adaptability in real-world environments. Furthermore, we introduce the ``Data Shadow'' system, which resolves silent failures by performing real-time tracking of data distributions, provenance, and semantic constraints. Experimental results demonstrate that Tree-Notebook achieves state-of-the-art (SOTA) performance on both InfiAgent-DABench and DSBench. To further evaluate robustness, we introduce an augmented version of InfiAgent-DABench to simulate complex environments, where Tree-Notebook consistently maintains its SOTA standing. Code is available at: https://github.com/QJK-BUAA/Tree-Notebook"
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="qiu-etal-2026-tree">
<titleInfo>
<title>Tree-Notebook: A Context-Aware Agent with Tree Search and Entropy-Aware Data Shadow for Interactive Data Science</title>
</titleInfo>
<name type="personal">
<namePart type="given">Junkun</namePart>
<namePart type="family">Qiu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qinghai</namePart>
<namePart type="family">Miao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>While LLM-based agents have emerged as a focal point for automating data science tasks, they continue to grapple with inefficient context management, “silent failures” (where code executes correctly but fails the task objectives), and error propagation inherent in sequential generation. In this paper, we propose Tree-Notebook, an agentic framework designed to mimic the iterative cognitive process of human data scientists. At its core, Tree-Notebook conceptualizes Jupyter Notebook cells as nodes within a tree structure, facilitating organized and efficient context retrieval. We formalize the task-solving process as a Partially Observable Markov Decision Process (POMDP) over a dynamic tree, utilizing an entropy-based information gain function for path evaluation to enhance adaptability in real-world environments. Furthermore, we introduce the “Data Shadow” system, which resolves silent failures by performing real-time tracking of data distributions, provenance, and semantic constraints. Experimental results demonstrate that Tree-Notebook achieves state-of-the-art (SOTA) performance on both InfiAgent-DABench and DSBench. To further evaluate robustness, we introduce an augmented version of InfiAgent-DABench to simulate complex environments, where Tree-Notebook consistently maintains its SOTA standing. Code is available at: https://github.com/QJK-BUAA/Tree-Notebook</abstract>
<identifier type="citekey">qiu-etal-2026-tree</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.783/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>15957</start>
<end>15970</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Tree-Notebook: A Context-Aware Agent with Tree Search and Entropy-Aware Data Shadow for Interactive Data Science
%A Qiu, Junkun
%A Huang, Min
%A Miao, Qinghai
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F qiu-etal-2026-tree
%X While LLM-based agents have emerged as a focal point for automating data science tasks, they continue to grapple with inefficient context management, “silent failures” (where code executes correctly but fails the task objectives), and error propagation inherent in sequential generation. In this paper, we propose Tree-Notebook, an agentic framework designed to mimic the iterative cognitive process of human data scientists. At its core, Tree-Notebook conceptualizes Jupyter Notebook cells as nodes within a tree structure, facilitating organized and efficient context retrieval. We formalize the task-solving process as a Partially Observable Markov Decision Process (POMDP) over a dynamic tree, utilizing an entropy-based information gain function for path evaluation to enhance adaptability in real-world environments. Furthermore, we introduce the “Data Shadow” system, which resolves silent failures by performing real-time tracking of data distributions, provenance, and semantic constraints. Experimental results demonstrate that Tree-Notebook achieves state-of-the-art (SOTA) performance on both InfiAgent-DABench and DSBench. To further evaluate robustness, we introduce an augmented version of InfiAgent-DABench to simulate complex environments, where Tree-Notebook consistently maintains its SOTA standing. Code is available at: https://github.com/QJK-BUAA/Tree-Notebook
%U https://aclanthology.org/2026.findings-acl.783/
%P 15957-15970
Markdown (Informal)
[Tree-Notebook: A Context-Aware Agent with Tree Search and Entropy-Aware Data Shadow for Interactive Data Science](https://aclanthology.org/2026.findings-acl.783/) (Qiu et al., Findings 2026)
ACL