@inproceedings{luo-etal-2020-merge,
title = "Merge and Recognize: A Geometry and 2{D} Context Aware Graph Model for Named Entity Recognition from Visual Documents",
author = "Luo, Chuwei and
Wang, Yongpan and
Zheng, Qi and
Li, Liangchen and
Gao, Feiyu and
Zhang, Shiyu",
editor = "Ustalov, Dmitry and
Somasundaran, Swapna and
Panchenko, Alexander and
Malliaros, Fragkiskos D. and
Hulpuș, Ioana and
Jansen, Peter and
Jana, Abhik",
booktitle = "Proceedings of the Graph-based Methods for Natural Language Processing (TextGraphs)",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.textgraphs-1.3/",
doi = "10.18653/v1/2020.textgraphs-1.3",
pages = "24--34",
abstract = "Named entity recognition (NER) from visual documents, such as invoices, receipts or business cards, is a critical task for visual document understanding. Most classical approaches use a sequence-based model (typically BiLSTM-CRF framework) without considering document structure. Recent work on graph-based model using graph convolutional networks to encode visual and textual features have achieved promising performance on the task. However, few attempts take geometry information of text segments (text in bounding box) in visual documents into account. Meanwhile, existing methods do not consider that related text segments which need to be merged to form a complete entity in many real-world situations. In this paper, we present GraphNEMR, a graph-based model that uses graph convolutional networks to jointly merge text segments and recognize named entities. By incorporating geometry information from visual documents into our model, richer 2D context information is generated to improve document representations. To merge text segments, we introduce a novel mechanism that captures both geometry information as well as semantic information based on pre-trained language model. Experimental results show that the proposed GraphNEMR model outperforms both sequence-based and graph-based SOTA methods significantly."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="luo-etal-2020-merge">
<titleInfo>
<title>Merge and Recognize: A Geometry and 2D Context Aware Graph Model for Named Entity Recognition from Visual Documents</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chuwei</namePart>
<namePart type="family">Luo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yongpan</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qi</namePart>
<namePart type="family">Zheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liangchen</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Feiyu</namePart>
<namePart type="family">Gao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shiyu</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Graph-based Methods for Natural Language Processing (TextGraphs)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dmitry</namePart>
<namePart type="family">Ustalov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Swapna</namePart>
<namePart type="family">Somasundaran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Panchenko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fragkiskos</namePart>
<namePart type="given">D</namePart>
<namePart type="family">Malliaros</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ioana</namePart>
<namePart type="family">Hulpuș</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Jansen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abhik</namePart>
<namePart type="family">Jana</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona, Spain (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Named entity recognition (NER) from visual documents, such as invoices, receipts or business cards, is a critical task for visual document understanding. Most classical approaches use a sequence-based model (typically BiLSTM-CRF framework) without considering document structure. Recent work on graph-based model using graph convolutional networks to encode visual and textual features have achieved promising performance on the task. However, few attempts take geometry information of text segments (text in bounding box) in visual documents into account. Meanwhile, existing methods do not consider that related text segments which need to be merged to form a complete entity in many real-world situations. In this paper, we present GraphNEMR, a graph-based model that uses graph convolutional networks to jointly merge text segments and recognize named entities. By incorporating geometry information from visual documents into our model, richer 2D context information is generated to improve document representations. To merge text segments, we introduce a novel mechanism that captures both geometry information as well as semantic information based on pre-trained language model. Experimental results show that the proposed GraphNEMR model outperforms both sequence-based and graph-based SOTA methods significantly.</abstract>
<identifier type="citekey">luo-etal-2020-merge</identifier>
<identifier type="doi">10.18653/v1/2020.textgraphs-1.3</identifier>
<location>
<url>https://aclanthology.org/2020.textgraphs-1.3/</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>24</start>
<end>34</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Merge and Recognize: A Geometry and 2D Context Aware Graph Model for Named Entity Recognition from Visual Documents
%A Luo, Chuwei
%A Wang, Yongpan
%A Zheng, Qi
%A Li, Liangchen
%A Gao, Feiyu
%A Zhang, Shiyu
%Y Ustalov, Dmitry
%Y Somasundaran, Swapna
%Y Panchenko, Alexander
%Y Malliaros, Fragkiskos D.
%Y Hulpuș, Ioana
%Y Jansen, Peter
%Y Jana, Abhik
%S Proceedings of the Graph-based Methods for Natural Language Processing (TextGraphs)
%D 2020
%8 December
%I Association for Computational Linguistics
%C Barcelona, Spain (Online)
%F luo-etal-2020-merge
%X Named entity recognition (NER) from visual documents, such as invoices, receipts or business cards, is a critical task for visual document understanding. Most classical approaches use a sequence-based model (typically BiLSTM-CRF framework) without considering document structure. Recent work on graph-based model using graph convolutional networks to encode visual and textual features have achieved promising performance on the task. However, few attempts take geometry information of text segments (text in bounding box) in visual documents into account. Meanwhile, existing methods do not consider that related text segments which need to be merged to form a complete entity in many real-world situations. In this paper, we present GraphNEMR, a graph-based model that uses graph convolutional networks to jointly merge text segments and recognize named entities. By incorporating geometry information from visual documents into our model, richer 2D context information is generated to improve document representations. To merge text segments, we introduce a novel mechanism that captures both geometry information as well as semantic information based on pre-trained language model. Experimental results show that the proposed GraphNEMR model outperforms both sequence-based and graph-based SOTA methods significantly.
%R 10.18653/v1/2020.textgraphs-1.3
%U https://aclanthology.org/2020.textgraphs-1.3/
%U https://doi.org/10.18653/v1/2020.textgraphs-1.3
%P 24-34
Markdown (Informal)
[Merge and Recognize: A Geometry and 2D Context Aware Graph Model for Named Entity Recognition from Visual Documents](https://aclanthology.org/2020.textgraphs-1.3/) (Luo et al., TextGraphs 2020)
ACL