@inproceedings{ramu-etal-2024-re2,
title = "$RE^2$: Region-Aware Relation Extraction from Visually Rich Documents",
author = "Ramu, Pritika and
Wang, Sijia and
Mouatadid, Lalla and
Rimchala, Joy and
Huang, Lifu",
editor = "Duh, Kevin and
Gomez, Helena and
Bethard, Steven",
booktitle = "Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.naacl-long.484",
doi = "10.18653/v1/2024.naacl-long.484",
pages = "8731--8747",
abstract = "Current research in form understanding predominantly relies on large pre-trained language models, necessitating extensive data for pre-training. However, the importance of layout structure (i.e., the spatial relationship between the entity blocks in the visually rich document) to relation extraction has been overlooked. In this paper, we propose $\textbf{RE}$gion-Aware $\textbf{R}$elation $\textbf{E}$xtraction ($\bf{RE^2}$) that leverages region-level spatial structure among the entity blocks to improve their relation prediction. We design an edge-aware graph attention network to learn the interaction between entities while considering their spatial relationship defined by their region-level representations. We also introduce a constraint objective to regularize the model towards consistency with the inherent constraints of the relation extraction task. To support the research on relation extraction from visually rich documents and demonstrate the generalizability of $\bf{RE^2}$, we build a new benchmark dataset, ${DiverseForm}$, that covers a wide range of domains. Extensive experiments on ${DiverseForm}$ and several public benchmark datasets demonstrate significant superiority and transferability of $\bf{RE^2}$ across various domains and languages, with up to 18.88{\%} absolute F-score gain over all high-performing baselines",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ramu-etal-2024-re2">
<titleInfo>
<title>RE²: Region-Aware Relation Extraction from Visually Rich Documents</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pritika</namePart>
<namePart type="family">Ramu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sijia</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lalla</namePart>
<namePart type="family">Mouatadid</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joy</namePart>
<namePart type="family">Rimchala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lifu</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Duh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Gomez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Current research in form understanding predominantly relies on large pre-trained language models, necessitating extensive data for pre-training. However, the importance of layout structure (i.e., the spatial relationship between the entity blocks in the visually rich document) to relation extraction has been overlooked. In this paper, we propose REgion-Aware Relation Extraction (RE²) that leverages region-level spatial structure among the entity blocks to improve their relation prediction. We design an edge-aware graph attention network to learn the interaction between entities while considering their spatial relationship defined by their region-level representations. We also introduce a constraint objective to regularize the model towards consistency with the inherent constraints of the relation extraction task. To support the research on relation extraction from visually rich documents and demonstrate the generalizability of RE², we build a new benchmark dataset, DiverseForm, that covers a wide range of domains. Extensive experiments on DiverseForm and several public benchmark datasets demonstrate significant superiority and transferability of RE² across various domains and languages, with up to 18.88% absolute F-score gain over all high-performing baselines</abstract>
<identifier type="citekey">ramu-etal-2024-re2</identifier>
<identifier type="doi">10.18653/v1/2024.naacl-long.484</identifier>
<location>
<url>https://aclanthology.org/2024.naacl-long.484</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>8731</start>
<end>8747</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T RE²: Region-Aware Relation Extraction from Visually Rich Documents
%A Ramu, Pritika
%A Wang, Sijia
%A Mouatadid, Lalla
%A Rimchala, Joy
%A Huang, Lifu
%Y Duh, Kevin
%Y Gomez, Helena
%Y Bethard, Steven
%S Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F ramu-etal-2024-re2
%X Current research in form understanding predominantly relies on large pre-trained language models, necessitating extensive data for pre-training. However, the importance of layout structure (i.e., the spatial relationship between the entity blocks in the visually rich document) to relation extraction has been overlooked. In this paper, we propose REgion-Aware Relation Extraction (RE²) that leverages region-level spatial structure among the entity blocks to improve their relation prediction. We design an edge-aware graph attention network to learn the interaction between entities while considering their spatial relationship defined by their region-level representations. We also introduce a constraint objective to regularize the model towards consistency with the inherent constraints of the relation extraction task. To support the research on relation extraction from visually rich documents and demonstrate the generalizability of RE², we build a new benchmark dataset, DiverseForm, that covers a wide range of domains. Extensive experiments on DiverseForm and several public benchmark datasets demonstrate significant superiority and transferability of RE² across various domains and languages, with up to 18.88% absolute F-score gain over all high-performing baselines
%R 10.18653/v1/2024.naacl-long.484
%U https://aclanthology.org/2024.naacl-long.484
%U https://doi.org/10.18653/v1/2024.naacl-long.484
%P 8731-8747
Markdown (Informal)
[RE2: Region-Aware Relation Extraction from Visually Rich Documents](https://aclanthology.org/2024.naacl-long.484) (Ramu et al., NAACL 2024)
ACL
- Pritika Ramu, Sijia Wang, Lalla Mouatadid, Joy Rimchala, and Lifu Huang. 2024. RE2: Region-Aware Relation Extraction from Visually Rich Documents. In Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), pages 8731–8747, Mexico City, Mexico. Association for Computational Linguistics.