@inproceedings{majumder-etal-2020-representation,
title = "Representation Learning for Information Extraction from Form-like Documents",
author = "Majumder, Bodhisattwa Prasad and
Potti, Navneet and
Tata, Sandeep and
Wendt, James Bradley and
Zhao, Qi and
Najork, Marc",
editor = "Jurafsky, Dan and
Chai, Joyce and
Schluter, Natalie and
Tetreault, Joel",
booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.acl-main.580",
doi = "10.18653/v1/2020.acl-main.580",
pages = "6495--6504",
abstract = "We propose a novel approach using representation learning for tackling the problem of extracting structured information from form-like document images. We propose an extraction system that uses knowledge of the types of the target fields to generate extraction candidates and a neural network architecture that learns a dense representation of each candidate based on neighboring words in the document. These learned representations are not only useful in solving the extraction task for unseen document templates from two different domains but are also interpretable, as we show using loss cases.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="majumder-etal-2020-representation">
<titleInfo>
<title>Representation Learning for Information Extraction from Form-like Documents</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bodhisattwa</namePart>
<namePart type="given">Prasad</namePart>
<namePart type="family">Majumder</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Navneet</namePart>
<namePart type="family">Potti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sandeep</namePart>
<namePart type="family">Tata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="given">Bradley</namePart>
<namePart type="family">Wendt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qi</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marc</namePart>
<namePart type="family">Najork</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Jurafsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Chai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Natalie</namePart>
<namePart type="family">Schluter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joel</namePart>
<namePart type="family">Tetreault</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We propose a novel approach using representation learning for tackling the problem of extracting structured information from form-like document images. We propose an extraction system that uses knowledge of the types of the target fields to generate extraction candidates and a neural network architecture that learns a dense representation of each candidate based on neighboring words in the document. These learned representations are not only useful in solving the extraction task for unseen document templates from two different domains but are also interpretable, as we show using loss cases.</abstract>
<identifier type="citekey">majumder-etal-2020-representation</identifier>
<identifier type="doi">10.18653/v1/2020.acl-main.580</identifier>
<location>
<url>https://aclanthology.org/2020.acl-main.580</url>
</location>
<part>
<date>2020-07</date>
<extent unit="page">
<start>6495</start>
<end>6504</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Representation Learning for Information Extraction from Form-like Documents
%A Majumder, Bodhisattwa Prasad
%A Potti, Navneet
%A Tata, Sandeep
%A Wendt, James Bradley
%A Zhao, Qi
%A Najork, Marc
%Y Jurafsky, Dan
%Y Chai, Joyce
%Y Schluter, Natalie
%Y Tetreault, Joel
%S Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics
%D 2020
%8 July
%I Association for Computational Linguistics
%C Online
%F majumder-etal-2020-representation
%X We propose a novel approach using representation learning for tackling the problem of extracting structured information from form-like document images. We propose an extraction system that uses knowledge of the types of the target fields to generate extraction candidates and a neural network architecture that learns a dense representation of each candidate based on neighboring words in the document. These learned representations are not only useful in solving the extraction task for unseen document templates from two different domains but are also interpretable, as we show using loss cases.
%R 10.18653/v1/2020.acl-main.580
%U https://aclanthology.org/2020.acl-main.580
%U https://doi.org/10.18653/v1/2020.acl-main.580
%P 6495-6504
Markdown (Informal)
[Representation Learning for Information Extraction from Form-like Documents](https://aclanthology.org/2020.acl-main.580) (Majumder et al., ACL 2020)
ACL