@inproceedings{wang-etal-2026-dsmentor,
title = "{DSM}entor: Curriculum-Guided Inference with Online Memory for Data-Science {LLM} Agents",
author = "Wang, He and
Li, Alexander Hanbo and
Hu, Yiqun and
Zhang, Sheng and
Kobayashi, Hideo and
Zhang, Jiani and
Zhu, Henghui and
Hang, Chung-Wei and
Ng, Patrick",
editor = "Gupta, Vivek and
Ding, Kaize and
Kokel, Harsha and
Zhao, Yue and
Agarwal, Amit and
Wang, Yu and
Glass, Michael and
Zhang, Yu and
Srinivas, Kavitha and
Chen, Xiusi and
Hassanzadeh, Oktie and
Zhu, Qi and
Chang, Shuaichen and
Luo, Yuan",
booktitle = "Proceedings of the First Workshop on Structured Understanding, Retrieval, and Generation in the {LLM} Era ({SURG}e{LLM} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.surgellm-1.12/",
pages = "190--208",
ISBN = "979-8-89176-406-4",
abstract = "Large language model (LLM) agents have shown strong capabilities in generating code to solve complex data science problems, yet they often overlook the impact of task order during inference. We present DSMentor, an inference-time optimization framework that applies curriculum learning{---}progressing from easier to harder tasks{---}to enhance LLM performance on challenging data science tasks. Guided by a mentor and supported by a growing long-term memory, DSMentor organizes problems by difficulty, retains prior experiences, and leverages them to guide subsequent reasoning. Extensive experiments on DSEval and QRData benchmarks show that DSMentor with Claude-3.5-Sonnet improves pass rates by up to 5.2{\%} over baseline agents and achieves an 8.8{\%} gain over GPT-4 with Program-of-Thoughts prompting. These results highlight the effectiveness of curriculum-based inference strategies in advancing LLM agents."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-etal-2026-dsmentor">
<titleInfo>
<title>DSMentor: Curriculum-Guided Inference with Online Memory for Data-Science LLM Agents</title>
</titleInfo>
<name type="personal">
<namePart type="given">He</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="given">Hanbo</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yiqun</namePart>
<namePart type="family">Hu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sheng</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hideo</namePart>
<namePart type="family">Kobayashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiani</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Henghui</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chung-Wei</namePart>
<namePart type="family">Hang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patrick</namePart>
<namePart type="family">Ng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Structured Understanding, Retrieval, and Generation in the LLM Era (SURGeLLM 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Gupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kaize</namePart>
<namePart type="family">Ding</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harsha</namePart>
<namePart type="family">Kokel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amit</namePart>
<namePart type="family">Agarwal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Glass</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kavitha</namePart>
<namePart type="family">Srinivas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiusi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oktie</namePart>
<namePart type="family">Hassanzadeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qi</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shuaichen</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuan</namePart>
<namePart type="family">Luo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-406-4</identifier>
</relatedItem>
<abstract>Large language model (LLM) agents have shown strong capabilities in generating code to solve complex data science problems, yet they often overlook the impact of task order during inference. We present DSMentor, an inference-time optimization framework that applies curriculum learning—progressing from easier to harder tasks—to enhance LLM performance on challenging data science tasks. Guided by a mentor and supported by a growing long-term memory, DSMentor organizes problems by difficulty, retains prior experiences, and leverages them to guide subsequent reasoning. Extensive experiments on DSEval and QRData benchmarks show that DSMentor with Claude-3.5-Sonnet improves pass rates by up to 5.2% over baseline agents and achieves an 8.8% gain over GPT-4 with Program-of-Thoughts prompting. These results highlight the effectiveness of curriculum-based inference strategies in advancing LLM agents.</abstract>
<identifier type="citekey">wang-etal-2026-dsmentor</identifier>
<location>
<url>https://aclanthology.org/2026.surgellm-1.12/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>190</start>
<end>208</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DSMentor: Curriculum-Guided Inference with Online Memory for Data-Science LLM Agents
%A Wang, He
%A Li, Alexander Hanbo
%A Hu, Yiqun
%A Zhang, Sheng
%A Kobayashi, Hideo
%A Zhang, Jiani
%A Zhu, Henghui
%A Hang, Chung-Wei
%A Ng, Patrick
%Y Gupta, Vivek
%Y Ding, Kaize
%Y Kokel, Harsha
%Y Zhao, Yue
%Y Agarwal, Amit
%Y Wang, Yu
%Y Glass, Michael
%Y Zhang, Yu
%Y Srinivas, Kavitha
%Y Chen, Xiusi
%Y Hassanzadeh, Oktie
%Y Zhu, Qi
%Y Chang, Shuaichen
%Y Luo, Yuan
%S Proceedings of the First Workshop on Structured Understanding, Retrieval, and Generation in the LLM Era (SURGeLLM 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-406-4
%F wang-etal-2026-dsmentor
%X Large language model (LLM) agents have shown strong capabilities in generating code to solve complex data science problems, yet they often overlook the impact of task order during inference. We present DSMentor, an inference-time optimization framework that applies curriculum learning—progressing from easier to harder tasks—to enhance LLM performance on challenging data science tasks. Guided by a mentor and supported by a growing long-term memory, DSMentor organizes problems by difficulty, retains prior experiences, and leverages them to guide subsequent reasoning. Extensive experiments on DSEval and QRData benchmarks show that DSMentor with Claude-3.5-Sonnet improves pass rates by up to 5.2% over baseline agents and achieves an 8.8% gain over GPT-4 with Program-of-Thoughts prompting. These results highlight the effectiveness of curriculum-based inference strategies in advancing LLM agents.
%U https://aclanthology.org/2026.surgellm-1.12/
%P 190-208
Markdown (Informal)
[DSMentor: Curriculum-Guided Inference with Online Memory for Data-Science LLM Agents](https://aclanthology.org/2026.surgellm-1.12/) (Wang et al., SURGeLLM 2026)
ACL
- He Wang, Alexander Hanbo Li, Yiqun Hu, Sheng Zhang, Hideo Kobayashi, Jiani Zhang, Henghui Zhu, Chung-Wei Hang, and Patrick Ng. 2026. DSMentor: Curriculum-Guided Inference with Online Memory for Data-Science LLM Agents. In Proceedings of the First Workshop on Structured Understanding, Retrieval, and Generation in the LLM Era (SURGeLLM 2026), pages 190–208, San Diego, California, United States. Association for Computational Linguistics.