@inproceedings{fan-etal-2026-nash,
title = "Nash-Pruned {C}red{MAS}: Dynamic Panel Pruning for {VLM}-{MAS} using Nash-based Selection and Doubly-Robust Credits",
author = "Fan, Yijia and
Liu, Mingyu and
Yang, Jing and
Wang, Jian and
Wang, Keze and
Zhang, Jusheng",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.1975/",
pages = "39640--39650",
ISBN = "979-8-89176-395-1",
abstract = "Multi-round Vision-Language Model (VLM) Multi-Agent Systems (MAS) offer powerful reasoning capabilities but suffer from prohibitive costs due to static panel designs, where all $N$ agents communicate at every $T$ round. This approach is fundamentally inefficient, as it ignores the \textit{context-dependent} and \textit{diminishing} marginal utility of specific agents. To address this, we propose \textbf{Nash-CredMAS}, an economic framework that transforms agent selection into a \textbf{dynamic resource allocation game}. Unlike heuristic routing or one-time pruning, our method operates in two phases: (1) \textbf{Offline Causal Value Learning}, where we employ a \textbf{doubly-robust (AIPW) estimator} to train a context-aware value function from biased interaction logs, effectively learning the true marginal contribution of agents; and (2) \textbf{Online Dynamic Auctions}, where agents bid for communication slots based on their predicted utility. We formulate the inference-time selection as a \textbf{submodular maximization problem} under budget constraints, theoretically guaranteeing a $(1 - 1/e)$-approximation of the optimal coalition via a greedy strategy. Empirically, Nash-CredMAS achieves state-of-the-art results on challenging benchmarks, including MMMU and V*-Bench, while reducing token consumption by over 25{\%} compared to static baselines. The system naturally converges to an economic equilibrium where agents actively remain silent when their marginal value does not justify the cost."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fan-etal-2026-nash">
<titleInfo>
<title>Nash-Pruned CredMAS: Dynamic Panel Pruning for VLM-MAS using Nash-based Selection and Doubly-Robust Credits</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yijia</namePart>
<namePart type="family">Fan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mingyu</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jing</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jian</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Keze</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jusheng</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>Multi-round Vision-Language Model (VLM) Multi-Agent Systems (MAS) offer powerful reasoning capabilities but suffer from prohibitive costs due to static panel designs, where all N agents communicate at every T round. This approach is fundamentally inefficient, as it ignores the context-dependent and diminishing marginal utility of specific agents. To address this, we propose Nash-CredMAS, an economic framework that transforms agent selection into a dynamic resource allocation game. Unlike heuristic routing or one-time pruning, our method operates in two phases: (1) Offline Causal Value Learning, where we employ a doubly-robust (AIPW) estimator to train a context-aware value function from biased interaction logs, effectively learning the true marginal contribution of agents; and (2) Online Dynamic Auctions, where agents bid for communication slots based on their predicted utility. We formulate the inference-time selection as a submodular maximization problem under budget constraints, theoretically guaranteeing a (1 - 1/e)-approximation of the optimal coalition via a greedy strategy. Empirically, Nash-CredMAS achieves state-of-the-art results on challenging benchmarks, including MMMU and V*-Bench, while reducing token consumption by over 25% compared to static baselines. The system naturally converges to an economic equilibrium where agents actively remain silent when their marginal value does not justify the cost.</abstract>
<identifier type="citekey">fan-etal-2026-nash</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.1975/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>39640</start>
<end>39650</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Nash-Pruned CredMAS: Dynamic Panel Pruning for VLM-MAS using Nash-based Selection and Doubly-Robust Credits
%A Fan, Yijia
%A Liu, Mingyu
%A Yang, Jing
%A Wang, Jian
%A Wang, Keze
%A Zhang, Jusheng
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F fan-etal-2026-nash
%X Multi-round Vision-Language Model (VLM) Multi-Agent Systems (MAS) offer powerful reasoning capabilities but suffer from prohibitive costs due to static panel designs, where all N agents communicate at every T round. This approach is fundamentally inefficient, as it ignores the context-dependent and diminishing marginal utility of specific agents. To address this, we propose Nash-CredMAS, an economic framework that transforms agent selection into a dynamic resource allocation game. Unlike heuristic routing or one-time pruning, our method operates in two phases: (1) Offline Causal Value Learning, where we employ a doubly-robust (AIPW) estimator to train a context-aware value function from biased interaction logs, effectively learning the true marginal contribution of agents; and (2) Online Dynamic Auctions, where agents bid for communication slots based on their predicted utility. We formulate the inference-time selection as a submodular maximization problem under budget constraints, theoretically guaranteeing a (1 - 1/e)-approximation of the optimal coalition via a greedy strategy. Empirically, Nash-CredMAS achieves state-of-the-art results on challenging benchmarks, including MMMU and V*-Bench, while reducing token consumption by over 25% compared to static baselines. The system naturally converges to an economic equilibrium where agents actively remain silent when their marginal value does not justify the cost.
%U https://aclanthology.org/2026.findings-acl.1975/
%P 39640-39650
Markdown (Informal)
[Nash-Pruned CredMAS: Dynamic Panel Pruning for VLM-MAS using Nash-based Selection and Doubly-Robust Credits](https://aclanthology.org/2026.findings-acl.1975/) (Fan et al., Findings 2026)
ACL