@inproceedings{yang-etal-2025-superrag,
title = "{S}uper{RAG}: Beyond {RAG} with Layout-Aware Graph Modeling",
author = "Yang, Chening and
Vu, Duy-Khanh and
Nguyen, Minh-Tien and
Nguyen, Xuan-Quang and
Nguyen, Linh and
Le, Hung",
editor = "Chen, Weizhu and
Yang, Yi and
Kachuee, Mohammad and
Fu, Xue-Yong",
booktitle = "Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 3: Industry Track)",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.naacl-industry.45/",
doi = "10.18653/v1/2025.naacl-industry.45",
pages = "544--557",
ISBN = "979-8-89176-194-0",
abstract = "This paper introduces layout-aware graph modeling for multimodal RAG. Different from traditional RAG methods that only deal with flat text chunks, the proposed method takes into account the relationship of multimodalities by using a graph structure. To do that, a graph modeling structure is defined based on document layout parsing. The structure of an input document is retained with the connection of text chunks, tables, and figures. This representation allows the method to handle complex questions that require information from multimodalities. To confirm the efficiency of the graph modeling, a flexible RAG pipeline is developed using robust components. Experimental results on four benchmark test sets confirm the contribution of the layout-aware modeling for performance improvement of the RAG pipeline."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yang-etal-2025-superrag">
<titleInfo>
<title>SuperRAG: Beyond RAG with Layout-Aware Graph Modeling</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chening</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Duy-Khanh</namePart>
<namePart type="family">Vu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Minh-Tien</namePart>
<namePart type="family">Nguyen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuan-Quang</namePart>
<namePart type="family">Nguyen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Linh</namePart>
<namePart type="family">Nguyen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hung</namePart>
<namePart type="family">Le</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 3: Industry Track)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Weizhu</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yi</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="family">Kachuee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xue-Yong</namePart>
<namePart type="family">Fu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-194-0</identifier>
</relatedItem>
<abstract>This paper introduces layout-aware graph modeling for multimodal RAG. Different from traditional RAG methods that only deal with flat text chunks, the proposed method takes into account the relationship of multimodalities by using a graph structure. To do that, a graph modeling structure is defined based on document layout parsing. The structure of an input document is retained with the connection of text chunks, tables, and figures. This representation allows the method to handle complex questions that require information from multimodalities. To confirm the efficiency of the graph modeling, a flexible RAG pipeline is developed using robust components. Experimental results on four benchmark test sets confirm the contribution of the layout-aware modeling for performance improvement of the RAG pipeline.</abstract>
<identifier type="citekey">yang-etal-2025-superrag</identifier>
<identifier type="doi">10.18653/v1/2025.naacl-industry.45</identifier>
<location>
<url>https://aclanthology.org/2025.naacl-industry.45/</url>
</location>
<part>
<date>2025-04</date>
<extent unit="page">
<start>544</start>
<end>557</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SuperRAG: Beyond RAG with Layout-Aware Graph Modeling
%A Yang, Chening
%A Vu, Duy-Khanh
%A Nguyen, Minh-Tien
%A Nguyen, Xuan-Quang
%A Nguyen, Linh
%A Le, Hung
%Y Chen, Weizhu
%Y Yang, Yi
%Y Kachuee, Mohammad
%Y Fu, Xue-Yong
%S Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 3: Industry Track)
%D 2025
%8 April
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-194-0
%F yang-etal-2025-superrag
%X This paper introduces layout-aware graph modeling for multimodal RAG. Different from traditional RAG methods that only deal with flat text chunks, the proposed method takes into account the relationship of multimodalities by using a graph structure. To do that, a graph modeling structure is defined based on document layout parsing. The structure of an input document is retained with the connection of text chunks, tables, and figures. This representation allows the method to handle complex questions that require information from multimodalities. To confirm the efficiency of the graph modeling, a flexible RAG pipeline is developed using robust components. Experimental results on four benchmark test sets confirm the contribution of the layout-aware modeling for performance improvement of the RAG pipeline.
%R 10.18653/v1/2025.naacl-industry.45
%U https://aclanthology.org/2025.naacl-industry.45/
%U https://doi.org/10.18653/v1/2025.naacl-industry.45
%P 544-557
Markdown (Informal)
[SuperRAG: Beyond RAG with Layout-Aware Graph Modeling](https://aclanthology.org/2025.naacl-industry.45/) (Yang et al., NAACL 2025)
ACL
- Chening Yang, Duy-Khanh Vu, Minh-Tien Nguyen, Xuan-Quang Nguyen, Linh Nguyen, and Hung Le. 2025. SuperRAG: Beyond RAG with Layout-Aware Graph Modeling. In Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 3: Industry Track), pages 544–557, Albuquerque, New Mexico. Association for Computational Linguistics.