@inproceedings{ma-etal-2026-intragent,
title = "{I}ntr{A}gent: An {LLM} Agent for Content-Grounded Information Retrieval through Literature Review",
author = "Ma, Fengbo and
Rao, Zixin and
Li, Xiaoting and
Chen, Zhetao and
Sun, Hongyue and
Zhao, Yiping and
Chen, Xianyan and
Xiang, Zhen",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-long.29/",
pages = "674--715",
ISBN = "979-8-89176-390-6",
abstract = "Scientific research relies on accurate information retrieval from literature to support analytical decisions.In this work, we introduce a new task, *INformation reTRieval through literAture reVIEW* (IntraView), which aims to automate fine-grained information retrieval *faithfully* grounded in the provided content in response to research-driven queries, and propose IntrAgent, an LLM-based agent that addresses this challenging task.In particular, IntrAgent is designed to mimic human behaviors when reading literature for information retrieval - identifying relevant sections and then iteratively extracting key details to refine the retrieved information.It follows a two-stage pipeline: a *Section Ranking* stage that prioritizes relevant literature sections through structural-knowledge-enabled reasoning, and an *Iterative Reading* stage that continuously extracts details and synthesizes them into concise, contextually grounded answers.To support rigorous evaluation, we introduce IntraBench, a new benchmark consisting of 315 test instances built from expert-authored questions paired with literature spanning *five* STEM domains.Across seven backbone LLMs, IntrAgent achieves on average 13.2{\%} higher cross-domain accuracy than state-of-the-art RAG and research-agent baselines."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ma-etal-2026-intragent">
<titleInfo>
<title>IntrAgent: An LLM Agent for Content-Grounded Information Retrieval through Literature Review</title>
</titleInfo>
<name type="personal">
<namePart type="given">Fengbo</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zixin</namePart>
<namePart type="family">Rao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaoting</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhetao</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hongyue</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yiping</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xianyan</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhen</namePart>
<namePart type="family">Xiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-390-6</identifier>
</relatedItem>
<abstract>Scientific research relies on accurate information retrieval from literature to support analytical decisions.In this work, we introduce a new task, *INformation reTRieval through literAture reVIEW* (IntraView), which aims to automate fine-grained information retrieval *faithfully* grounded in the provided content in response to research-driven queries, and propose IntrAgent, an LLM-based agent that addresses this challenging task.In particular, IntrAgent is designed to mimic human behaviors when reading literature for information retrieval - identifying relevant sections and then iteratively extracting key details to refine the retrieved information.It follows a two-stage pipeline: a *Section Ranking* stage that prioritizes relevant literature sections through structural-knowledge-enabled reasoning, and an *Iterative Reading* stage that continuously extracts details and synthesizes them into concise, contextually grounded answers.To support rigorous evaluation, we introduce IntraBench, a new benchmark consisting of 315 test instances built from expert-authored questions paired with literature spanning *five* STEM domains.Across seven backbone LLMs, IntrAgent achieves on average 13.2% higher cross-domain accuracy than state-of-the-art RAG and research-agent baselines.</abstract>
<identifier type="citekey">ma-etal-2026-intragent</identifier>
<location>
<url>https://aclanthology.org/2026.acl-long.29/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>674</start>
<end>715</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T IntrAgent: An LLM Agent for Content-Grounded Information Retrieval through Literature Review
%A Ma, Fengbo
%A Rao, Zixin
%A Li, Xiaoting
%A Chen, Zhetao
%A Sun, Hongyue
%A Zhao, Yiping
%A Chen, Xianyan
%A Xiang, Zhen
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-390-6
%F ma-etal-2026-intragent
%X Scientific research relies on accurate information retrieval from literature to support analytical decisions.In this work, we introduce a new task, *INformation reTRieval through literAture reVIEW* (IntraView), which aims to automate fine-grained information retrieval *faithfully* grounded in the provided content in response to research-driven queries, and propose IntrAgent, an LLM-based agent that addresses this challenging task.In particular, IntrAgent is designed to mimic human behaviors when reading literature for information retrieval - identifying relevant sections and then iteratively extracting key details to refine the retrieved information.It follows a two-stage pipeline: a *Section Ranking* stage that prioritizes relevant literature sections through structural-knowledge-enabled reasoning, and an *Iterative Reading* stage that continuously extracts details and synthesizes them into concise, contextually grounded answers.To support rigorous evaluation, we introduce IntraBench, a new benchmark consisting of 315 test instances built from expert-authored questions paired with literature spanning *five* STEM domains.Across seven backbone LLMs, IntrAgent achieves on average 13.2% higher cross-domain accuracy than state-of-the-art RAG and research-agent baselines.
%U https://aclanthology.org/2026.acl-long.29/
%P 674-715
Markdown (Informal)
[IntrAgent: An LLM Agent for Content-Grounded Information Retrieval through Literature Review](https://aclanthology.org/2026.acl-long.29/) (Ma et al., ACL 2026)
ACL
- Fengbo Ma, Zixin Rao, Xiaoting Li, Zhetao Chen, Hongyue Sun, Yiping Zhao, Xianyan Chen, and Zhen Xiang. 2026. IntrAgent: An LLM Agent for Content-Grounded Information Retrieval through Literature Review. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 674–715, San Diego, California, United States. Association for Computational Linguistics.