@inproceedings{silva-silva-2021-review-document,
title = "A Review on Document Information Extraction Approaches",
author = "Silva, Kanishka and
Silva, Thushari",
editor = "Djabri, Souhila and
Gimadi, Dinara and
Mihaylova, Tsvetomila and
Nikolova-Koleva, Ivelina",
booktitle = "Proceedings of the Student Research Workshop Associated with RANLP 2021",
month = sep,
year = "2021",
address = "Online",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/2021.ranlp-srw.24",
pages = "174--179",
abstract = "Information extraction from documents has become great use of novel natural language processing areas. Most of the entity extraction methodologies are variant in a context such as medical area, financial area, also come even limited to the given language. It is better to have one generic approach applicable for any document type to extract entity information regardless of language, context, and structure. Also, another issue in such research is structural analysis while keeping the hierarchical, semantic, and heuristic features. Another problem identified is that usually, it requires a massive training corpus. Therefore, this research focus on mitigating such barriers. Several approaches have been identifying towards building document information extractors focusing on different disciplines. This research area involves natural language processing, semantic analysis, information extraction, and conceptual modelling. This paper presents a review of the information extraction mechanism to construct a generic framework for document extraction with aim of providing a solid base for upcoming research.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="silva-silva-2021-review-document">
<titleInfo>
<title>A Review on Document Information Extraction Approaches</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kanishka</namePart>
<namePart type="family">Silva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thushari</namePart>
<namePart type="family">Silva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Student Research Workshop Associated with RANLP 2021</title>
</titleInfo>
<name type="personal">
<namePart type="given">Souhila</namePart>
<namePart type="family">Djabri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dinara</namePart>
<namePart type="family">Gimadi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tsvetomila</namePart>
<namePart type="family">Mihaylova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivelina</namePart>
<namePart type="family">Nikolova-Koleva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Information extraction from documents has become great use of novel natural language processing areas. Most of the entity extraction methodologies are variant in a context such as medical area, financial area, also come even limited to the given language. It is better to have one generic approach applicable for any document type to extract entity information regardless of language, context, and structure. Also, another issue in such research is structural analysis while keeping the hierarchical, semantic, and heuristic features. Another problem identified is that usually, it requires a massive training corpus. Therefore, this research focus on mitigating such barriers. Several approaches have been identifying towards building document information extractors focusing on different disciplines. This research area involves natural language processing, semantic analysis, information extraction, and conceptual modelling. This paper presents a review of the information extraction mechanism to construct a generic framework for document extraction with aim of providing a solid base for upcoming research.</abstract>
<identifier type="citekey">silva-silva-2021-review-document</identifier>
<location>
<url>https://aclanthology.org/2021.ranlp-srw.24</url>
</location>
<part>
<date>2021-09</date>
<extent unit="page">
<start>174</start>
<end>179</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Review on Document Information Extraction Approaches
%A Silva, Kanishka
%A Silva, Thushari
%Y Djabri, Souhila
%Y Gimadi, Dinara
%Y Mihaylova, Tsvetomila
%Y Nikolova-Koleva, Ivelina
%S Proceedings of the Student Research Workshop Associated with RANLP 2021
%D 2021
%8 September
%I INCOMA Ltd.
%C Online
%F silva-silva-2021-review-document
%X Information extraction from documents has become great use of novel natural language processing areas. Most of the entity extraction methodologies are variant in a context such as medical area, financial area, also come even limited to the given language. It is better to have one generic approach applicable for any document type to extract entity information regardless of language, context, and structure. Also, another issue in such research is structural analysis while keeping the hierarchical, semantic, and heuristic features. Another problem identified is that usually, it requires a massive training corpus. Therefore, this research focus on mitigating such barriers. Several approaches have been identifying towards building document information extractors focusing on different disciplines. This research area involves natural language processing, semantic analysis, information extraction, and conceptual modelling. This paper presents a review of the information extraction mechanism to construct a generic framework for document extraction with aim of providing a solid base for upcoming research.
%U https://aclanthology.org/2021.ranlp-srw.24
%P 174-179
Markdown (Informal)
[A Review on Document Information Extraction Approaches](https://aclanthology.org/2021.ranlp-srw.24) (Silva & Silva, RANLP 2021)
ACL