@inproceedings{zhou-etal-2025-reso,
title = "{R}e{S}o: A Reward-driven Self-organizing {LLM}-based Multi-Agent System for Reasoning Tasks",
author = "Zhou, Heng and
Geng, Hejia and
Xue, Xiangyuan and
Kang, Li and
Qin, Yiran and
Wang, Zhiyong and
Yin, Zhenfei and
Bai, Lei",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-main.808/",
doi = "10.18653/v1/2025.emnlp-main.808",
pages = "15979--15998",
ISBN = "979-8-89176-332-6",
abstract = "Multi-agent systems have emerged as a promising approach for enhancing the reasoning capabilities of large language models in complex problem-solving. However, current MAS frameworks are limited by poor flexibility and scalability, with underdeveloped optimization strategies. To address these challenges, we propose ReSo, which integrates task graph generation with a reward-driven two-stage agent selection process. The core of ReSo is the proposed Collaborative Reward Model, which can provide fine-grained reward signals for MAS cooperation for optimization. We also introduce an automated data synthesis framework for generating MAS benchmarks, without human annotations. Experimentally, ReSo matches or outperforms existing methods. ReSo achieves $\textbf{33.7\%}$ and $\textbf{32.3\%}$ accuracy on Math-MAS and SciBench-MAS SciBench, while other methods completely fail. The code and data are available at [Reso](https://github.com/hengzzzhou/ReSo)."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhou-etal-2025-reso">
<titleInfo>
<title>ReSo: A Reward-driven Self-organizing LLM-based Multi-Agent System for Reasoning Tasks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Heng</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hejia</namePart>
<namePart type="family">Geng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiangyuan</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Li</namePart>
<namePart type="family">Kang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yiran</namePart>
<namePart type="family">Qin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhiyong</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhenfei</namePart>
<namePart type="family">Yin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lei</namePart>
<namePart type="family">Bai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-332-6</identifier>
</relatedItem>
<abstract>Multi-agent systems have emerged as a promising approach for enhancing the reasoning capabilities of large language models in complex problem-solving. However, current MAS frameworks are limited by poor flexibility and scalability, with underdeveloped optimization strategies. To address these challenges, we propose ReSo, which integrates task graph generation with a reward-driven two-stage agent selection process. The core of ReSo is the proposed Collaborative Reward Model, which can provide fine-grained reward signals for MAS cooperation for optimization. We also introduce an automated data synthesis framework for generating MAS benchmarks, without human annotations. Experimentally, ReSo matches or outperforms existing methods. ReSo achieves 33.7% and 32.3% accuracy on Math-MAS and SciBench-MAS SciBench, while other methods completely fail. The code and data are available at [Reso](https://github.com/hengzzzhou/ReSo).</abstract>
<identifier type="citekey">zhou-etal-2025-reso</identifier>
<identifier type="doi">10.18653/v1/2025.emnlp-main.808</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-main.808/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>15979</start>
<end>15998</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ReSo: A Reward-driven Self-organizing LLM-based Multi-Agent System for Reasoning Tasks
%A Zhou, Heng
%A Geng, Hejia
%A Xue, Xiangyuan
%A Kang, Li
%A Qin, Yiran
%A Wang, Zhiyong
%A Yin, Zhenfei
%A Bai, Lei
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-332-6
%F zhou-etal-2025-reso
%X Multi-agent systems have emerged as a promising approach for enhancing the reasoning capabilities of large language models in complex problem-solving. However, current MAS frameworks are limited by poor flexibility and scalability, with underdeveloped optimization strategies. To address these challenges, we propose ReSo, which integrates task graph generation with a reward-driven two-stage agent selection process. The core of ReSo is the proposed Collaborative Reward Model, which can provide fine-grained reward signals for MAS cooperation for optimization. We also introduce an automated data synthesis framework for generating MAS benchmarks, without human annotations. Experimentally, ReSo matches or outperforms existing methods. ReSo achieves 33.7% and 32.3% accuracy on Math-MAS and SciBench-MAS SciBench, while other methods completely fail. The code and data are available at [Reso](https://github.com/hengzzzhou/ReSo).
%R 10.18653/v1/2025.emnlp-main.808
%U https://aclanthology.org/2025.emnlp-main.808/
%U https://doi.org/10.18653/v1/2025.emnlp-main.808
%P 15979-15998
Markdown (Informal)
[ReSo: A Reward-driven Self-organizing LLM-based Multi-Agent System for Reasoning Tasks](https://aclanthology.org/2025.emnlp-main.808/) (Zhou et al., EMNLP 2025)
ACL
- Heng Zhou, Hejia Geng, Xiangyuan Xue, Li Kang, Yiran Qin, Zhiyong Wang, Zhenfei Yin, and Lei Bai. 2025. ReSo: A Reward-driven Self-organizing LLM-based Multi-Agent System for Reasoning Tasks. In Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing, pages 15979–15998, Suzhou, China. Association for Computational Linguistics.