@inproceedings{neupane-etal-2025-structured,
title = "Structured Information Extraction from {N}epali Scanned Documents using Layout Transformer and {LLM}s",
author = "Neupane, Aayush and
Lamichhane, Aayush and
Paudel, Ankit and
Shakya, Aman",
editor = "Sarveswaran, Kengatharaiyer and
Vaidya, Ashwini and
Krishna Bal, Bal and
Shams, Sana and
Thapa, Surendrabikram",
booktitle = "Proceedings of the First Workshop on Challenges in Processing South Asian Languages (CHiPSAL 2025)",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2025.chipsal-1.13/",
pages = "134--143",
abstract = "Despite growing global interest in information extraction from scanned documents, there is still a significant research gap concerning Nepali documents. This study seeks to address this gap by focusing on methods for extracting information from texts with Nepali typeface or Devanagari characters. The primary focus is on the performance of the Language Independent Layout Transformer (LiLT), which was employed as a token classifier to extract information from Nepali texts. LiLT achieved F1 score of approximately 0.87. Complementing this approach, large language models (LLMs), including OpenAI`s proprietary GPT-4o and the open-source Llama 3.1 8B, were also evaluated. The GPT-4o model exhibited promising performance, with an accuracy of around 55-80{\%} accuracy for a complete match, accuracy varying among different fields. Llama 3.1 8B model achieved only 20-40{\%} accuracy. For 90{\%} match both GPT-4o and Llama 3.1 8B had higher accuracy by varying amounts for different fields. Llama 3.1 8B performed particularly poorly compared to the LiLT model. These results aim to provide a foundation for future work in the domain of digitization of Nepali documents."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="neupane-etal-2025-structured">
<titleInfo>
<title>Structured Information Extraction from Nepali Scanned Documents using Layout Transformer and LLMs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aayush</namePart>
<namePart type="family">Neupane</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aayush</namePart>
<namePart type="family">Lamichhane</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ankit</namePart>
<namePart type="family">Paudel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aman</namePart>
<namePart type="family">Shakya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Challenges in Processing South Asian Languages (CHiPSAL 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kengatharaiyer</namePart>
<namePart type="family">Sarveswaran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ashwini</namePart>
<namePart type="family">Vaidya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bal</namePart>
<namePart type="family">Krishna Bal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sana</namePart>
<namePart type="family">Shams</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Surendrabikram</namePart>
<namePart type="family">Thapa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Despite growing global interest in information extraction from scanned documents, there is still a significant research gap concerning Nepali documents. This study seeks to address this gap by focusing on methods for extracting information from texts with Nepali typeface or Devanagari characters. The primary focus is on the performance of the Language Independent Layout Transformer (LiLT), which was employed as a token classifier to extract information from Nepali texts. LiLT achieved F1 score of approximately 0.87. Complementing this approach, large language models (LLMs), including OpenAI‘s proprietary GPT-4o and the open-source Llama 3.1 8B, were also evaluated. The GPT-4o model exhibited promising performance, with an accuracy of around 55-80% accuracy for a complete match, accuracy varying among different fields. Llama 3.1 8B model achieved only 20-40% accuracy. For 90% match both GPT-4o and Llama 3.1 8B had higher accuracy by varying amounts for different fields. Llama 3.1 8B performed particularly poorly compared to the LiLT model. These results aim to provide a foundation for future work in the domain of digitization of Nepali documents.</abstract>
<identifier type="citekey">neupane-etal-2025-structured</identifier>
<location>
<url>https://aclanthology.org/2025.chipsal-1.13/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>134</start>
<end>143</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Structured Information Extraction from Nepali Scanned Documents using Layout Transformer and LLMs
%A Neupane, Aayush
%A Lamichhane, Aayush
%A Paudel, Ankit
%A Shakya, Aman
%Y Sarveswaran, Kengatharaiyer
%Y Vaidya, Ashwini
%Y Krishna Bal, Bal
%Y Shams, Sana
%Y Thapa, Surendrabikram
%S Proceedings of the First Workshop on Challenges in Processing South Asian Languages (CHiPSAL 2025)
%D 2025
%8 January
%I International Committee on Computational Linguistics
%C Abu Dhabi, UAE
%F neupane-etal-2025-structured
%X Despite growing global interest in information extraction from scanned documents, there is still a significant research gap concerning Nepali documents. This study seeks to address this gap by focusing on methods for extracting information from texts with Nepali typeface or Devanagari characters. The primary focus is on the performance of the Language Independent Layout Transformer (LiLT), which was employed as a token classifier to extract information from Nepali texts. LiLT achieved F1 score of approximately 0.87. Complementing this approach, large language models (LLMs), including OpenAI‘s proprietary GPT-4o and the open-source Llama 3.1 8B, were also evaluated. The GPT-4o model exhibited promising performance, with an accuracy of around 55-80% accuracy for a complete match, accuracy varying among different fields. Llama 3.1 8B model achieved only 20-40% accuracy. For 90% match both GPT-4o and Llama 3.1 8B had higher accuracy by varying amounts for different fields. Llama 3.1 8B performed particularly poorly compared to the LiLT model. These results aim to provide a foundation for future work in the domain of digitization of Nepali documents.
%U https://aclanthology.org/2025.chipsal-1.13/
%P 134-143
Markdown (Informal)
[Structured Information Extraction from Nepali Scanned Documents using Layout Transformer and LLMs](https://aclanthology.org/2025.chipsal-1.13/) (Neupane et al., CHiPSAL 2025)
ACL