@inproceedings{zhou-etal-2026-toolgrad,
title = "{T}ool{G}rad: Efficient Tool-use Dataset Generation with Textual ``Gradients''",
author = "Zhou, Zhongyi and
Uehara, Kohei and
Zhang, Haoyu and
Zhou, Jingtao and
Gu, Lin and
Du, Ruofei and
Xu, Zheng and
Harada, Tatsuya",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.950/",
pages = "19040--19056",
ISBN = "979-8-89176-395-1",
abstract = "Prior work synthesizes tool-use LLM datasets by first generating a user query, followed by complex tool-use annotations like DFS. This inherently leads to inevitable annotation failures and low efficiency in data generation. We introduce ToolGrad, an agentic framework that inverts this paradigm. ToolGrad first constructs valid tool-use chains through an iterative process guided by textual ``gradients'', and then synthesizes corresponding user queries. This ``answer-first'' approach led to ToolGrad-500, a dataset generated with more complex tool use, lower cost, and almost 100{\%} pass rate. Experiments show that models trained on ToolGrad-500 outperform those trained on expensive baseline datasets and proprietary LLMs."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhou-etal-2026-toolgrad">
<titleInfo>
<title>ToolGrad: Efficient Tool-use Dataset Generation with Textual “Gradients”</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zhongyi</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kohei</namePart>
<namePart type="family">Uehara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haoyu</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jingtao</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lin</namePart>
<namePart type="family">Gu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruofei</namePart>
<namePart type="family">Du</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zheng</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tatsuya</namePart>
<namePart type="family">Harada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>Prior work synthesizes tool-use LLM datasets by first generating a user query, followed by complex tool-use annotations like DFS. This inherently leads to inevitable annotation failures and low efficiency in data generation. We introduce ToolGrad, an agentic framework that inverts this paradigm. ToolGrad first constructs valid tool-use chains through an iterative process guided by textual “gradients”, and then synthesizes corresponding user queries. This “answer-first” approach led to ToolGrad-500, a dataset generated with more complex tool use, lower cost, and almost 100% pass rate. Experiments show that models trained on ToolGrad-500 outperform those trained on expensive baseline datasets and proprietary LLMs.</abstract>
<identifier type="citekey">zhou-etal-2026-toolgrad</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.950/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>19040</start>
<end>19056</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ToolGrad: Efficient Tool-use Dataset Generation with Textual “Gradients”
%A Zhou, Zhongyi
%A Uehara, Kohei
%A Zhang, Haoyu
%A Zhou, Jingtao
%A Gu, Lin
%A Du, Ruofei
%A Xu, Zheng
%A Harada, Tatsuya
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F zhou-etal-2026-toolgrad
%X Prior work synthesizes tool-use LLM datasets by first generating a user query, followed by complex tool-use annotations like DFS. This inherently leads to inevitable annotation failures and low efficiency in data generation. We introduce ToolGrad, an agentic framework that inverts this paradigm. ToolGrad first constructs valid tool-use chains through an iterative process guided by textual “gradients”, and then synthesizes corresponding user queries. This “answer-first” approach led to ToolGrad-500, a dataset generated with more complex tool use, lower cost, and almost 100% pass rate. Experiments show that models trained on ToolGrad-500 outperform those trained on expensive baseline datasets and proprietary LLMs.
%U https://aclanthology.org/2026.findings-acl.950/
%P 19040-19056
Markdown (Informal)
[ToolGrad: Efficient Tool-use Dataset Generation with Textual “Gradients”](https://aclanthology.org/2026.findings-acl.950/) (Zhou et al., Findings 2026)
ACL
- Zhongyi Zhou, Kohei Uehara, Haoyu Zhang, Jingtao Zhou, Lin Gu, Ruofei Du, Zheng Xu, and Tatsuya Harada. 2026. ToolGrad: Efficient Tool-use Dataset Generation with Textual “Gradients”. In Findings of the Association for Computational Linguistics: ACL 2026, pages 19040–19056, San Diego, California, United States. Association for Computational Linguistics.