@inproceedings{zhao-etal-2026-rethinking,
title = "Rethinking Reasoning-Intensive Retrieval: Evaluating and Advancing Retrievers in Agentic Search Systems",
author = "Zhao, Yilun and
Wei, Jinbiao and
Song, Tingyu and
Zhang, Siyue and
Zhao, Chen and
Cohan, Arman",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-long.1705/",
pages = "36776--36806",
ISBN = "979-8-89176-390-6",
abstract = "Reasoning-intensive retrieval aims to surface evidence that maximizes downstream reasoning utility rather than only topical similarity. This capability is increasingly vital for agentic retriever-in-the-loop systems such as Deep-Research. However, existing retriever evaluation benchmarks, exemplified by Bright, provide narrow gold sets and evaluate retrievers in isolation, which obscures their value inside realistic agent workflows. We introduce Bright-Pro, an evaluation framework that assesses the effectiveness of retrievers in agentic search systems. Bright-Pro covers a broad range of queries across diverse professional domains. For each query, we provide expert-annotated reasoning aspects, positive documents, a reference response, and evaluation rubrics, enabling fine-grained assessment of retriever performance. Beyond static evaluation, we further assess retrievers in the context of agentic search systems, measuring their practical utility when serving as core components within agentic workflows. Using Bright-Pro, we evaluate classical lexical, general-purpose, and reasoning-intensive retrievers, providing actionable insights for future retriever development."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhao-etal-2026-rethinking">
<titleInfo>
<title>Rethinking Reasoning-Intensive Retrieval: Evaluating and Advancing Retrievers in Agentic Search Systems</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yilun</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jinbiao</namePart>
<namePart type="family">Wei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tingyu</namePart>
<namePart type="family">Song</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Siyue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chen</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arman</namePart>
<namePart type="family">Cohan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-390-6</identifier>
</relatedItem>
<abstract>Reasoning-intensive retrieval aims to surface evidence that maximizes downstream reasoning utility rather than only topical similarity. This capability is increasingly vital for agentic retriever-in-the-loop systems such as Deep-Research. However, existing retriever evaluation benchmarks, exemplified by Bright, provide narrow gold sets and evaluate retrievers in isolation, which obscures their value inside realistic agent workflows. We introduce Bright-Pro, an evaluation framework that assesses the effectiveness of retrievers in agentic search systems. Bright-Pro covers a broad range of queries across diverse professional domains. For each query, we provide expert-annotated reasoning aspects, positive documents, a reference response, and evaluation rubrics, enabling fine-grained assessment of retriever performance. Beyond static evaluation, we further assess retrievers in the context of agentic search systems, measuring their practical utility when serving as core components within agentic workflows. Using Bright-Pro, we evaluate classical lexical, general-purpose, and reasoning-intensive retrievers, providing actionable insights for future retriever development.</abstract>
<identifier type="citekey">zhao-etal-2026-rethinking</identifier>
<location>
<url>https://aclanthology.org/2026.acl-long.1705/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>36776</start>
<end>36806</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Rethinking Reasoning-Intensive Retrieval: Evaluating and Advancing Retrievers in Agentic Search Systems
%A Zhao, Yilun
%A Wei, Jinbiao
%A Song, Tingyu
%A Zhang, Siyue
%A Zhao, Chen
%A Cohan, Arman
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-390-6
%F zhao-etal-2026-rethinking
%X Reasoning-intensive retrieval aims to surface evidence that maximizes downstream reasoning utility rather than only topical similarity. This capability is increasingly vital for agentic retriever-in-the-loop systems such as Deep-Research. However, existing retriever evaluation benchmarks, exemplified by Bright, provide narrow gold sets and evaluate retrievers in isolation, which obscures their value inside realistic agent workflows. We introduce Bright-Pro, an evaluation framework that assesses the effectiveness of retrievers in agentic search systems. Bright-Pro covers a broad range of queries across diverse professional domains. For each query, we provide expert-annotated reasoning aspects, positive documents, a reference response, and evaluation rubrics, enabling fine-grained assessment of retriever performance. Beyond static evaluation, we further assess retrievers in the context of agentic search systems, measuring their practical utility when serving as core components within agentic workflows. Using Bright-Pro, we evaluate classical lexical, general-purpose, and reasoning-intensive retrievers, providing actionable insights for future retriever development.
%U https://aclanthology.org/2026.acl-long.1705/
%P 36776-36806
Markdown (Informal)
[Rethinking Reasoning-Intensive Retrieval: Evaluating and Advancing Retrievers in Agentic Search Systems](https://aclanthology.org/2026.acl-long.1705/) (Zhao et al., ACL 2026)
ACL