@inproceedings{liu-etal-2026-feasible,
title = "Feasible is Not Enough: Cost-Aware Optimal Tool-Chain Planning on Multi-Solution Tool Graphs",
author = "Liu, Liangliang and
Li, Yanming and
Liu, Yigang and
Han, Jialong and
Shen, Rujia and
Guan, Yi and
Lin, Yi and
Jiang, Jingchi",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.860/",
pages = "17387--17403",
ISBN = "979-8-89176-395-1",
abstract = "Tool graphs (TG) model dependencies among tools and resources, enabling more structured organization and management of large toolsets. However, existing methods and benchmarks often formulate tool learning (TL) as a single-solution setting, overlooking the fact that many tasks admit multiple valid tool combinations and therefore require optimal solution selection. Moreover, exploring large-scale TG is computationally expensive, especially under constrained context budgets. To address these challenges, we propose TOPT, an efficient framework for learning optimal TL policies over large TG, as well as construct MultiSoTLBench, a large-scale Multi-Solution TL Benchmark, where each task admits multiple valid solutions. Specifically, to improve search efficiency in large action spaces, TOPT adopts a progressive graph expansion strategy: we train a reinforcement learning (RL) agent to acquire transferable expansion skills and construct, on demand, a compact solvable subgraph that preserves only task-relevant links. This reduces the size of the candidate space and the context usage from the outset. To enable optimal selection, we further propose a progressive graph reasoning framework. It performs RL-driven optimality analysis and scheduling on the expanded subgraph to generate an optimal tool chain that balances path length and tool cost. Comprehensive experiments on MultiSoTLBench demonstrate that TOPT generalizes effectively, improving task success and solution optimality by 46.21{\%} and 66.34{\%}, respectively."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="liu-etal-2026-feasible">
<titleInfo>
<title>Feasible is Not Enough: Cost-Aware Optimal Tool-Chain Planning on Multi-Solution Tool Graphs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Liangliang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yanming</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yigang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jialong</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rujia</namePart>
<namePart type="family">Shen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yi</namePart>
<namePart type="family">Guan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yi</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jingchi</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>Tool graphs (TG) model dependencies among tools and resources, enabling more structured organization and management of large toolsets. However, existing methods and benchmarks often formulate tool learning (TL) as a single-solution setting, overlooking the fact that many tasks admit multiple valid tool combinations and therefore require optimal solution selection. Moreover, exploring large-scale TG is computationally expensive, especially under constrained context budgets. To address these challenges, we propose TOPT, an efficient framework for learning optimal TL policies over large TG, as well as construct MultiSoTLBench, a large-scale Multi-Solution TL Benchmark, where each task admits multiple valid solutions. Specifically, to improve search efficiency in large action spaces, TOPT adopts a progressive graph expansion strategy: we train a reinforcement learning (RL) agent to acquire transferable expansion skills and construct, on demand, a compact solvable subgraph that preserves only task-relevant links. This reduces the size of the candidate space and the context usage from the outset. To enable optimal selection, we further propose a progressive graph reasoning framework. It performs RL-driven optimality analysis and scheduling on the expanded subgraph to generate an optimal tool chain that balances path length and tool cost. Comprehensive experiments on MultiSoTLBench demonstrate that TOPT generalizes effectively, improving task success and solution optimality by 46.21% and 66.34%, respectively.</abstract>
<identifier type="citekey">liu-etal-2026-feasible</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.860/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>17387</start>
<end>17403</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Feasible is Not Enough: Cost-Aware Optimal Tool-Chain Planning on Multi-Solution Tool Graphs
%A Liu, Liangliang
%A Li, Yanming
%A Liu, Yigang
%A Han, Jialong
%A Shen, Rujia
%A Guan, Yi
%A Lin, Yi
%A Jiang, Jingchi
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F liu-etal-2026-feasible
%X Tool graphs (TG) model dependencies among tools and resources, enabling more structured organization and management of large toolsets. However, existing methods and benchmarks often formulate tool learning (TL) as a single-solution setting, overlooking the fact that many tasks admit multiple valid tool combinations and therefore require optimal solution selection. Moreover, exploring large-scale TG is computationally expensive, especially under constrained context budgets. To address these challenges, we propose TOPT, an efficient framework for learning optimal TL policies over large TG, as well as construct MultiSoTLBench, a large-scale Multi-Solution TL Benchmark, where each task admits multiple valid solutions. Specifically, to improve search efficiency in large action spaces, TOPT adopts a progressive graph expansion strategy: we train a reinforcement learning (RL) agent to acquire transferable expansion skills and construct, on demand, a compact solvable subgraph that preserves only task-relevant links. This reduces the size of the candidate space and the context usage from the outset. To enable optimal selection, we further propose a progressive graph reasoning framework. It performs RL-driven optimality analysis and scheduling on the expanded subgraph to generate an optimal tool chain that balances path length and tool cost. Comprehensive experiments on MultiSoTLBench demonstrate that TOPT generalizes effectively, improving task success and solution optimality by 46.21% and 66.34%, respectively.
%U https://aclanthology.org/2026.findings-acl.860/
%P 17387-17403
Markdown (Informal)
[Feasible is Not Enough: Cost-Aware Optimal Tool-Chain Planning on Multi-Solution Tool Graphs](https://aclanthology.org/2026.findings-acl.860/) (Liu et al., Findings 2026)
ACL
- Liangliang Liu, Yanming Li, Yigang Liu, Jialong Han, Rujia Shen, Yi Guan, Yi Lin, and Jingchi Jiang. 2026. Feasible is Not Enough: Cost-Aware Optimal Tool-Chain Planning on Multi-Solution Tool Graphs. In Findings of the Association for Computational Linguistics: ACL 2026, pages 17387–17403, San Diego, California, United States. Association for Computational Linguistics.