@inproceedings{sharma-etal-2026-test,
title = "Test-Time Strategies for More Efficient and Accurate Agentic {RAG}",
author = "Sharma, Abhinav and
Zhang, Brian and
Guntur, Deepti and
Zuo, Zhiyang and
Chaudhari, Shreyas and
Zhao, Wenlong and
Dernoncourt, Franck and
Mathur, Puneet and
Rossi, Ryan A. and
Lipka, Nedim",
editor = "T.Y.S.S., Santosh and
Rodriguez, Juan Diego and
de Gibert, Ona",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics ({ACL} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-srw.41/",
pages = "463--469",
ISBN = "979-8-89176-393-7",
abstract = "Retrieval-Augmented Generation (RAG) systems face challenges with complex, multi-hop questions, and iterative agentic frameworks such as Search-R1 (Jin et al., 2025) have been proposed to address these complexities. However, such approaches can introduce inefficiencies, including repetitive retrieval of previously processed information and challenges in contextualizing retrieved results effectively within the current generation prompt. Such issues can lead to unnecessary retrieval turns, suboptimal reasoning, inaccurate answers, and increased token consumption. In this paper, we investigate test-time modifications to Search-R1{'}s open-source Qwen2.5-7B pipeline to mitigate these identified shortcomings. Specifically, we explore the integration of two components and their combination: a contextualization module to better integrate relevant information from retrieved documents into reasoning, and a de-duplication module that replaces previously retrieved documents with the next most relevant ones. We evaluate our approaches using the HotpotQA (Yang et al., 2018) and the Natural Questions (Kwiatkowski et al., 2019) datasets, reporting the exact match (EM) score, an LLM-as-a-Judge assessment of answer correctness, and the average number of turns. Our best-performing variant (contextualization) achieves a 5.6{\%} increase in EM score and reduces the average number of turns by 10.5{\%} compared to the Search-R1 baseline. While contextualization itself introduces additional LLM calls, our results demonstrate improved answer accuracy and reduced retrieval load."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sharma-etal-2026-test">
<titleInfo>
<title>Test-Time Strategies for More Efficient and Accurate Agentic RAG</title>
</titleInfo>
<name type="personal">
<namePart type="given">Abhinav</namePart>
<namePart type="family">Sharma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brian</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Deepti</namePart>
<namePart type="family">Guntur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhiyang</namePart>
<namePart type="family">Zuo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shreyas</namePart>
<namePart type="family">Chaudhari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wenlong</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Franck</namePart>
<namePart type="family">Dernoncourt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Puneet</namePart>
<namePart type="family">Mathur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Rossi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nedim</namePart>
<namePart type="family">Lipka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (ACL 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Santosh</namePart>
<namePart type="family">T.Y.S.S.</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="given">Diego</namePart>
<namePart type="family">Rodriguez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ona</namePart>
<namePart type="family">de Gibert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-393-7</identifier>
</relatedItem>
<abstract>Retrieval-Augmented Generation (RAG) systems face challenges with complex, multi-hop questions, and iterative agentic frameworks such as Search-R1 (Jin et al., 2025) have been proposed to address these complexities. However, such approaches can introduce inefficiencies, including repetitive retrieval of previously processed information and challenges in contextualizing retrieved results effectively within the current generation prompt. Such issues can lead to unnecessary retrieval turns, suboptimal reasoning, inaccurate answers, and increased token consumption. In this paper, we investigate test-time modifications to Search-R1’s open-source Qwen2.5-7B pipeline to mitigate these identified shortcomings. Specifically, we explore the integration of two components and their combination: a contextualization module to better integrate relevant information from retrieved documents into reasoning, and a de-duplication module that replaces previously retrieved documents with the next most relevant ones. We evaluate our approaches using the HotpotQA (Yang et al., 2018) and the Natural Questions (Kwiatkowski et al., 2019) datasets, reporting the exact match (EM) score, an LLM-as-a-Judge assessment of answer correctness, and the average number of turns. Our best-performing variant (contextualization) achieves a 5.6% increase in EM score and reduces the average number of turns by 10.5% compared to the Search-R1 baseline. While contextualization itself introduces additional LLM calls, our results demonstrate improved answer accuracy and reduced retrieval load.</abstract>
<identifier type="citekey">sharma-etal-2026-test</identifier>
<location>
<url>https://aclanthology.org/2026.acl-srw.41/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>463</start>
<end>469</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Test-Time Strategies for More Efficient and Accurate Agentic RAG
%A Sharma, Abhinav
%A Zhang, Brian
%A Guntur, Deepti
%A Zuo, Zhiyang
%A Chaudhari, Shreyas
%A Zhao, Wenlong
%A Dernoncourt, Franck
%A Mathur, Puneet
%A Rossi, Ryan A.
%A Lipka, Nedim
%Y T.Y.S.S., Santosh
%Y Rodriguez, Juan Diego
%Y de Gibert, Ona
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (ACL 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-393-7
%F sharma-etal-2026-test
%X Retrieval-Augmented Generation (RAG) systems face challenges with complex, multi-hop questions, and iterative agentic frameworks such as Search-R1 (Jin et al., 2025) have been proposed to address these complexities. However, such approaches can introduce inefficiencies, including repetitive retrieval of previously processed information and challenges in contextualizing retrieved results effectively within the current generation prompt. Such issues can lead to unnecessary retrieval turns, suboptimal reasoning, inaccurate answers, and increased token consumption. In this paper, we investigate test-time modifications to Search-R1’s open-source Qwen2.5-7B pipeline to mitigate these identified shortcomings. Specifically, we explore the integration of two components and their combination: a contextualization module to better integrate relevant information from retrieved documents into reasoning, and a de-duplication module that replaces previously retrieved documents with the next most relevant ones. We evaluate our approaches using the HotpotQA (Yang et al., 2018) and the Natural Questions (Kwiatkowski et al., 2019) datasets, reporting the exact match (EM) score, an LLM-as-a-Judge assessment of answer correctness, and the average number of turns. Our best-performing variant (contextualization) achieves a 5.6% increase in EM score and reduces the average number of turns by 10.5% compared to the Search-R1 baseline. While contextualization itself introduces additional LLM calls, our results demonstrate improved answer accuracy and reduced retrieval load.
%U https://aclanthology.org/2026.acl-srw.41/
%P 463-469
Markdown (Informal)
[Test-Time Strategies for More Efficient and Accurate Agentic RAG](https://aclanthology.org/2026.acl-srw.41/) (Sharma et al., ACL 2026)
ACL
- Abhinav Sharma, Brian Zhang, Deepti Guntur, Zhiyang Zuo, Shreyas Chaudhari, Wenlong Zhao, Franck Dernoncourt, Puneet Mathur, Ryan A. Rossi, and Nedim Lipka. 2026. Test-Time Strategies for More Efficient and Accurate Agentic RAG. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (ACL 2026), pages 463–469, San Diego, California, United States. Association for Computational Linguistics.