@inproceedings{wang-etal-2026-rethinking,
title = "Rethinking Reading Order: Toward Generalizable Document Understanding with {LLM}-based Relation Modeling",
author = "Wang, Weishi and
Hu, Hengchang and
Dahlmeier, Daniel",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.eacl-long.192/",
pages = "4110--4130",
ISBN = "979-8-89176-380-7",
abstract = "Document understanding requires modeling both structural and semantic relationships between the layout elements within the document, with human-perceived reading order (RO) playing a crucial yet often neglected role compared to heuristic OCR sequences used by most existing models. Previous approaches depend on costly, inconsistent human annotations, limiting scalability and generalization. To bridge the gap, we propose a cost-effective paradigm that leverages large language models (LLMs) to infer global RO and inter-element layout relations without human supervision. By explicitly incorporating RO as structural guidance, our method captures hierarchical, document-level dependencies beyond local adjacency. Experiments on Semantic Entity Recognition, Entity Linking, and Document Question Answering show consistent improvements over baseline methods. Notably, LLM-inferred RO, even when differing from ground-truth adjacency, provides richer global structural priors and yields superior downstream performance. These results and findings demonstrate the scalability and significance of RO-aware modeling, advancing both LLMs and lightweight layout-aware models for robust document understanding. Code, data, and more details will be made publicly available after corporate review, in accordance with SAP{'}s corporate open-source policy."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-etal-2026-rethinking">
<titleInfo>
<title>Rethinking Reading Order: Toward Generalizable Document Understanding with LLM-based Relation Modeling</title>
</titleInfo>
<name type="personal">
<namePart type="given">Weishi</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hengchang</namePart>
<namePart type="family">Hu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Dahlmeier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Marquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-380-7</identifier>
</relatedItem>
<abstract>Document understanding requires modeling both structural and semantic relationships between the layout elements within the document, with human-perceived reading order (RO) playing a crucial yet often neglected role compared to heuristic OCR sequences used by most existing models. Previous approaches depend on costly, inconsistent human annotations, limiting scalability and generalization. To bridge the gap, we propose a cost-effective paradigm that leverages large language models (LLMs) to infer global RO and inter-element layout relations without human supervision. By explicitly incorporating RO as structural guidance, our method captures hierarchical, document-level dependencies beyond local adjacency. Experiments on Semantic Entity Recognition, Entity Linking, and Document Question Answering show consistent improvements over baseline methods. Notably, LLM-inferred RO, even when differing from ground-truth adjacency, provides richer global structural priors and yields superior downstream performance. These results and findings demonstrate the scalability and significance of RO-aware modeling, advancing both LLMs and lightweight layout-aware models for robust document understanding. Code, data, and more details will be made publicly available after corporate review, in accordance with SAP’s corporate open-source policy.</abstract>
<identifier type="citekey">wang-etal-2026-rethinking</identifier>
<location>
<url>https://aclanthology.org/2026.eacl-long.192/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>4110</start>
<end>4130</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Rethinking Reading Order: Toward Generalizable Document Understanding with LLM-based Relation Modeling
%A Wang, Weishi
%A Hu, Hengchang
%A Dahlmeier, Daniel
%Y Demberg, Vera
%Y Inui, Kentaro
%Y Marquez, Lluís
%S Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-380-7
%F wang-etal-2026-rethinking
%X Document understanding requires modeling both structural and semantic relationships between the layout elements within the document, with human-perceived reading order (RO) playing a crucial yet often neglected role compared to heuristic OCR sequences used by most existing models. Previous approaches depend on costly, inconsistent human annotations, limiting scalability and generalization. To bridge the gap, we propose a cost-effective paradigm that leverages large language models (LLMs) to infer global RO and inter-element layout relations without human supervision. By explicitly incorporating RO as structural guidance, our method captures hierarchical, document-level dependencies beyond local adjacency. Experiments on Semantic Entity Recognition, Entity Linking, and Document Question Answering show consistent improvements over baseline methods. Notably, LLM-inferred RO, even when differing from ground-truth adjacency, provides richer global structural priors and yields superior downstream performance. These results and findings demonstrate the scalability and significance of RO-aware modeling, advancing both LLMs and lightweight layout-aware models for robust document understanding. Code, data, and more details will be made publicly available after corporate review, in accordance with SAP’s corporate open-source policy.
%U https://aclanthology.org/2026.eacl-long.192/
%P 4110-4130
Markdown (Informal)
[Rethinking Reading Order: Toward Generalizable Document Understanding with LLM-based Relation Modeling](https://aclanthology.org/2026.eacl-long.192/) (Wang et al., EACL 2026)
ACL