@inproceedings{tao-etal-2025-treerag,
title = "{T}ree{RAG}: Unleashing the Power of Hierarchical Storage for Enhanced Knowledge Retrieval in Long Documents",
author = "Tao, Wenyu and
Xing, Xiaofen and
Chen, Yirong and
Huang, Linyi and
Xu, Xiangmin",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.20/",
doi = "10.18653/v1/2025.findings-acl.20",
pages = "356--371",
ISBN = "979-8-89176-256-5",
abstract = "When confronting long document information retrieval for Query-Focused Summarization(QFS), Traditional Retrieval-Augmented Generation(RAG) frameworks struggle to retrieve all relevant knowledge points, and the chunking and retrieve strategies of existing frameworks may disrupt the connections between knowledge points and the integrity of the information. To address these issues, we propose $\textbf{TreeRAG}$, which employs $\textbf{Tree-Chunking}$ for chunking and embedding in a tree-like structure , coupled with ``$\textbf{root-to-leaves}$'' and ``$\textbf{leaf-to-root}$'' retrieve strategy named $\textbf{Bidirectional Traversal Retrieval}$. This approach effectively preserves the hierarchical structure among knowledge points and significantly enhances the ability to retrieve while minimizing noise inference. Our experimental results on the $\textbf{Finance, Law, and Medical subsets of the Dragonball dataset}$ demonstrate that $\textbf{TreeRAG}$ achieves significant enhancements in both recall quality and precision compared to traditional and popular existing methods and achieves better performance to corresponding question-answering tasks, marking a new breakthrough in long document knowledge retrieval."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tao-etal-2025-treerag">
<titleInfo>
<title>TreeRAG: Unleashing the Power of Hierarchical Storage for Enhanced Knowledge Retrieval in Long Documents</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wenyu</namePart>
<namePart type="family">Tao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaofen</namePart>
<namePart type="family">Xing</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yirong</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Linyi</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiangmin</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-256-5</identifier>
</relatedItem>
<abstract>When confronting long document information retrieval for Query-Focused Summarization(QFS), Traditional Retrieval-Augmented Generation(RAG) frameworks struggle to retrieve all relevant knowledge points, and the chunking and retrieve strategies of existing frameworks may disrupt the connections between knowledge points and the integrity of the information. To address these issues, we propose TreeRAG, which employs Tree-Chunking for chunking and embedding in a tree-like structure , coupled with “root-to-leaves” and “leaf-to-root” retrieve strategy named Bidirectional Traversal Retrieval. This approach effectively preserves the hierarchical structure among knowledge points and significantly enhances the ability to retrieve while minimizing noise inference. Our experimental results on the Finance, Law, and Medical subsets of the Dragonball dataset demonstrate that TreeRAG achieves significant enhancements in both recall quality and precision compared to traditional and popular existing methods and achieves better performance to corresponding question-answering tasks, marking a new breakthrough in long document knowledge retrieval.</abstract>
<identifier type="citekey">tao-etal-2025-treerag</identifier>
<identifier type="doi">10.18653/v1/2025.findings-acl.20</identifier>
<location>
<url>https://aclanthology.org/2025.findings-acl.20/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>356</start>
<end>371</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T TreeRAG: Unleashing the Power of Hierarchical Storage for Enhanced Knowledge Retrieval in Long Documents
%A Tao, Wenyu
%A Xing, Xiaofen
%A Chen, Yirong
%A Huang, Linyi
%A Xu, Xiangmin
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Findings of the Association for Computational Linguistics: ACL 2025
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-256-5
%F tao-etal-2025-treerag
%X When confronting long document information retrieval for Query-Focused Summarization(QFS), Traditional Retrieval-Augmented Generation(RAG) frameworks struggle to retrieve all relevant knowledge points, and the chunking and retrieve strategies of existing frameworks may disrupt the connections between knowledge points and the integrity of the information. To address these issues, we propose TreeRAG, which employs Tree-Chunking for chunking and embedding in a tree-like structure , coupled with “root-to-leaves” and “leaf-to-root” retrieve strategy named Bidirectional Traversal Retrieval. This approach effectively preserves the hierarchical structure among knowledge points and significantly enhances the ability to retrieve while minimizing noise inference. Our experimental results on the Finance, Law, and Medical subsets of the Dragonball dataset demonstrate that TreeRAG achieves significant enhancements in both recall quality and precision compared to traditional and popular existing methods and achieves better performance to corresponding question-answering tasks, marking a new breakthrough in long document knowledge retrieval.
%R 10.18653/v1/2025.findings-acl.20
%U https://aclanthology.org/2025.findings-acl.20/
%U https://doi.org/10.18653/v1/2025.findings-acl.20
%P 356-371
Markdown (Informal)
[TreeRAG: Unleashing the Power of Hierarchical Storage for Enhanced Knowledge Retrieval in Long Documents](https://aclanthology.org/2025.findings-acl.20/) (Tao et al., Findings 2025)
ACL