@inproceedings{wu-etal-2026-reflective,
title = "Reflective {RAG}: Self-Evaluation Driven Strategy Optimization in Agentic Retrieval-Augmented Generation",
author = "Wu, Haiyan and
Wang, Chenchen and
Sun, Chaoqun and
Lu, Chengxiong and
Chen, Yan-Hong and
Zhang, Zhiqiang and
Feng, Xiaoqing",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.648/",
pages = "13252--13267",
ISBN = "979-8-89176-395-1",
abstract = "Retrieval-Augmented Generation (RAG) has emerged as a widely adopted paradigm for grounding Large Language Models (LLMs) in external knowledge. Recent agentic RAG systems introduce multi-turn reasoning, but they often lack the capacity to evaluate the utility of retrieved information, leading to brittle reasoning and suboptimal decision-making. We propose Reflective RAG, an agentic framework that incorporates self-evaluation to dynamically optimize retrieval and generation strategy. At its core, Reflective RAG employs a reflection tagging mechanism that allows the model to critique the relevance of retrieved content, thereby explicitly guiding its subsequent policy. To ensure robust learning, we introduce a two-stage training procedure that partially decouples evaluation semantics from strategy optimization. First, during supervised fine-tuning (SFT), the model learns to generate accurate reflection signals by self-correcting labels based on internal uncertainty. Second, a reinforcement learning (RL) stage optimizes the agent{'}s strategy using these reflections, stabilized by dynamic KL regularization. Evaluations across five knowledge-intensive QA benchmarks demonstrate that Reflective RAG consistently outperforms strong agentic baselines. Further analysis demonstrates its improved training stability and stronger generalization to complex multi-hop reasoning tasks."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wu-etal-2026-reflective">
<titleInfo>
<title>Reflective RAG: Self-Evaluation Driven Strategy Optimization in Agentic Retrieval-Augmented Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Haiyan</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chenchen</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chaoqun</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chengxiong</namePart>
<namePart type="family">Lu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yan-Hong</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhiqiang</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaoqing</namePart>
<namePart type="family">Feng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>Retrieval-Augmented Generation (RAG) has emerged as a widely adopted paradigm for grounding Large Language Models (LLMs) in external knowledge. Recent agentic RAG systems introduce multi-turn reasoning, but they often lack the capacity to evaluate the utility of retrieved information, leading to brittle reasoning and suboptimal decision-making. We propose Reflective RAG, an agentic framework that incorporates self-evaluation to dynamically optimize retrieval and generation strategy. At its core, Reflective RAG employs a reflection tagging mechanism that allows the model to critique the relevance of retrieved content, thereby explicitly guiding its subsequent policy. To ensure robust learning, we introduce a two-stage training procedure that partially decouples evaluation semantics from strategy optimization. First, during supervised fine-tuning (SFT), the model learns to generate accurate reflection signals by self-correcting labels based on internal uncertainty. Second, a reinforcement learning (RL) stage optimizes the agent’s strategy using these reflections, stabilized by dynamic KL regularization. Evaluations across five knowledge-intensive QA benchmarks demonstrate that Reflective RAG consistently outperforms strong agentic baselines. Further analysis demonstrates its improved training stability and stronger generalization to complex multi-hop reasoning tasks.</abstract>
<identifier type="citekey">wu-etal-2026-reflective</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.648/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>13252</start>
<end>13267</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Reflective RAG: Self-Evaluation Driven Strategy Optimization in Agentic Retrieval-Augmented Generation
%A Wu, Haiyan
%A Wang, Chenchen
%A Sun, Chaoqun
%A Lu, Chengxiong
%A Chen, Yan-Hong
%A Zhang, Zhiqiang
%A Feng, Xiaoqing
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F wu-etal-2026-reflective
%X Retrieval-Augmented Generation (RAG) has emerged as a widely adopted paradigm for grounding Large Language Models (LLMs) in external knowledge. Recent agentic RAG systems introduce multi-turn reasoning, but they often lack the capacity to evaluate the utility of retrieved information, leading to brittle reasoning and suboptimal decision-making. We propose Reflective RAG, an agentic framework that incorporates self-evaluation to dynamically optimize retrieval and generation strategy. At its core, Reflective RAG employs a reflection tagging mechanism that allows the model to critique the relevance of retrieved content, thereby explicitly guiding its subsequent policy. To ensure robust learning, we introduce a two-stage training procedure that partially decouples evaluation semantics from strategy optimization. First, during supervised fine-tuning (SFT), the model learns to generate accurate reflection signals by self-correcting labels based on internal uncertainty. Second, a reinforcement learning (RL) stage optimizes the agent’s strategy using these reflections, stabilized by dynamic KL regularization. Evaluations across five knowledge-intensive QA benchmarks demonstrate that Reflective RAG consistently outperforms strong agentic baselines. Further analysis demonstrates its improved training stability and stronger generalization to complex multi-hop reasoning tasks.
%U https://aclanthology.org/2026.findings-acl.648/
%P 13252-13267
Markdown (Informal)
[Reflective RAG: Self-Evaluation Driven Strategy Optimization in Agentic Retrieval-Augmented Generation](https://aclanthology.org/2026.findings-acl.648/) (Wu et al., Findings 2026)
ACL
- Haiyan Wu, Chenchen Wang, Chaoqun Sun, Chengxiong Lu, Yan-Hong Chen, Zhiqiang Zhang, and Xiaoqing Feng. 2026. Reflective RAG: Self-Evaluation Driven Strategy Optimization in Agentic Retrieval-Augmented Generation. In Findings of the Association for Computational Linguistics: ACL 2026, pages 13252–13267, San Diego, California, United States. Association for Computational Linguistics.