@inproceedings{deng-etal-2025-agentpro,
title = "{A}gent{P}ro: Enhancing {LLM} Agents with Automated Process Supervision",
author = "Deng, Yuchen and
Fan, Shichen and
Wang, Naibo and
Zhao, Xinkui and
Ng, See-Kiong",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-main.506/",
pages = "9992--10017",
ISBN = "979-8-89176-332-6",
abstract = "Large language model (LLM) agents have demonstrated significant potential for addressing complex tasks through mechanisms such as chain-of-thought reasoning and tool invocation. However, current frameworks lack explicit supervision during the reasoning process, which may lead to error propagation across reasoning chains and hinder the optimization of intermediate decision-making stages. This paper introduces a novel framework, AgentPro, which enhances LLM agent performance by automated process supervision. AgentPro employs Monte Carlo Tree Search to automatically generate step-level annotations, and develops a process reward model based on these annotations to facilitate fine-grained quality assessment of reasoning. By employing a rejection sampling strategy, the LLM agent dynamically adjusts generation probability distributions to prevent the continuation of erroneous paths, thereby improving reasoning capabilities. Extensive experiments on four datasets indicate that our method significantly outperforms existing agent-based LLM methods (e.g., achieving a 6.32{\%} increase in accuracy on the HotpotQA dataset), underscoring its proficiency in managing intricate reasoning chains."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="deng-etal-2025-agentpro">
<titleInfo>
<title>AgentPro: Enhancing LLM Agents with Automated Process Supervision</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yuchen</namePart>
<namePart type="family">Deng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shichen</namePart>
<namePart type="family">Fan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naibo</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xinkui</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">See-Kiong</namePart>
<namePart type="family">Ng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-332-6</identifier>
</relatedItem>
<abstract>Large language model (LLM) agents have demonstrated significant potential for addressing complex tasks through mechanisms such as chain-of-thought reasoning and tool invocation. However, current frameworks lack explicit supervision during the reasoning process, which may lead to error propagation across reasoning chains and hinder the optimization of intermediate decision-making stages. This paper introduces a novel framework, AgentPro, which enhances LLM agent performance by automated process supervision. AgentPro employs Monte Carlo Tree Search to automatically generate step-level annotations, and develops a process reward model based on these annotations to facilitate fine-grained quality assessment of reasoning. By employing a rejection sampling strategy, the LLM agent dynamically adjusts generation probability distributions to prevent the continuation of erroneous paths, thereby improving reasoning capabilities. Extensive experiments on four datasets indicate that our method significantly outperforms existing agent-based LLM methods (e.g., achieving a 6.32% increase in accuracy on the HotpotQA dataset), underscoring its proficiency in managing intricate reasoning chains.</abstract>
<identifier type="citekey">deng-etal-2025-agentpro</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-main.506/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>9992</start>
<end>10017</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T AgentPro: Enhancing LLM Agents with Automated Process Supervision
%A Deng, Yuchen
%A Fan, Shichen
%A Wang, Naibo
%A Zhao, Xinkui
%A Ng, See-Kiong
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-332-6
%F deng-etal-2025-agentpro
%X Large language model (LLM) agents have demonstrated significant potential for addressing complex tasks through mechanisms such as chain-of-thought reasoning and tool invocation. However, current frameworks lack explicit supervision during the reasoning process, which may lead to error propagation across reasoning chains and hinder the optimization of intermediate decision-making stages. This paper introduces a novel framework, AgentPro, which enhances LLM agent performance by automated process supervision. AgentPro employs Monte Carlo Tree Search to automatically generate step-level annotations, and develops a process reward model based on these annotations to facilitate fine-grained quality assessment of reasoning. By employing a rejection sampling strategy, the LLM agent dynamically adjusts generation probability distributions to prevent the continuation of erroneous paths, thereby improving reasoning capabilities. Extensive experiments on four datasets indicate that our method significantly outperforms existing agent-based LLM methods (e.g., achieving a 6.32% increase in accuracy on the HotpotQA dataset), underscoring its proficiency in managing intricate reasoning chains.
%U https://aclanthology.org/2025.emnlp-main.506/
%P 9992-10017
Markdown (Informal)
[AgentPro: Enhancing LLM Agents with Automated Process Supervision](https://aclanthology.org/2025.emnlp-main.506/) (Deng et al., EMNLP 2025)
ACL