@inproceedings{shakya-etal-2025-skpd,
title = "{SKPD} Emergency @ {NLU} of {D}evanagari Script Languages 2025: {D}evanagari Script Classification using {CBOW} Embeddings with Attention-Enhanced {B}i{LSTM}",
author = "Shakya, Shubham and
Sainju, Saral and
Shrestha, Subham Krishna and
Dawadi, Prekshya and
Khatiwada, Shreya",
editor = "Sarveswaran, Kengatharaiyer and
Vaidya, Ashwini and
Krishna Bal, Bal and
Shams, Sana and
Thapa, Surendrabikram",
booktitle = "Proceedings of the First Workshop on Challenges in Processing South Asian Languages (CHiPSAL 2025)",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2025.chipsal-1.40/",
pages = "339--343",
abstract = "Devanagari script, encompassing languages such as Nepali, Marathi, Sanskrit, Bhojpuri and Hindi, involves challenges for identification due to its overlapping character sets and lexical characteristics. To address this, we propose a method that utilizes Continuous Bag of Words (CBOW) embeddings integrated with attention-enhanced Bidirectional Long Short-Term Memory (BiLSTM) network. Our methodology involves meticulous data preprocessing and generation of word embeddings to better the model`s ability. The proposed method achieves an overall accuracy of 99{\%}, significantly outperforming character level identification approaches. The results reveal high precision across most language pairs, though minor classification confusions persist between closely related languages. Our findings demonstrate the robustness of the CBOW-BiLSTM model for Devanagari script classification and highlights the importance of accurate language identification in preserving linguistic diversity in multilingual environments. Keywords: Language Identification, Devanagari Script, Natural Language Processing, Neural Networks"
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="shakya-etal-2025-skpd">
<titleInfo>
<title>SKPD Emergency @ NLU of Devanagari Script Languages 2025: Devanagari Script Classification using CBOW Embeddings with Attention-Enhanced BiLSTM</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shubham</namePart>
<namePart type="family">Shakya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saral</namePart>
<namePart type="family">Sainju</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Subham</namePart>
<namePart type="given">Krishna</namePart>
<namePart type="family">Shrestha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Prekshya</namePart>
<namePart type="family">Dawadi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shreya</namePart>
<namePart type="family">Khatiwada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Challenges in Processing South Asian Languages (CHiPSAL 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kengatharaiyer</namePart>
<namePart type="family">Sarveswaran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ashwini</namePart>
<namePart type="family">Vaidya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bal</namePart>
<namePart type="family">Krishna Bal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sana</namePart>
<namePart type="family">Shams</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Surendrabikram</namePart>
<namePart type="family">Thapa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Devanagari script, encompassing languages such as Nepali, Marathi, Sanskrit, Bhojpuri and Hindi, involves challenges for identification due to its overlapping character sets and lexical characteristics. To address this, we propose a method that utilizes Continuous Bag of Words (CBOW) embeddings integrated with attention-enhanced Bidirectional Long Short-Term Memory (BiLSTM) network. Our methodology involves meticulous data preprocessing and generation of word embeddings to better the model‘s ability. The proposed method achieves an overall accuracy of 99%, significantly outperforming character level identification approaches. The results reveal high precision across most language pairs, though minor classification confusions persist between closely related languages. Our findings demonstrate the robustness of the CBOW-BiLSTM model for Devanagari script classification and highlights the importance of accurate language identification in preserving linguistic diversity in multilingual environments. Keywords: Language Identification, Devanagari Script, Natural Language Processing, Neural Networks</abstract>
<identifier type="citekey">shakya-etal-2025-skpd</identifier>
<location>
<url>https://aclanthology.org/2025.chipsal-1.40/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>339</start>
<end>343</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SKPD Emergency @ NLU of Devanagari Script Languages 2025: Devanagari Script Classification using CBOW Embeddings with Attention-Enhanced BiLSTM
%A Shakya, Shubham
%A Sainju, Saral
%A Shrestha, Subham Krishna
%A Dawadi, Prekshya
%A Khatiwada, Shreya
%Y Sarveswaran, Kengatharaiyer
%Y Vaidya, Ashwini
%Y Krishna Bal, Bal
%Y Shams, Sana
%Y Thapa, Surendrabikram
%S Proceedings of the First Workshop on Challenges in Processing South Asian Languages (CHiPSAL 2025)
%D 2025
%8 January
%I International Committee on Computational Linguistics
%C Abu Dhabi, UAE
%F shakya-etal-2025-skpd
%X Devanagari script, encompassing languages such as Nepali, Marathi, Sanskrit, Bhojpuri and Hindi, involves challenges for identification due to its overlapping character sets and lexical characteristics. To address this, we propose a method that utilizes Continuous Bag of Words (CBOW) embeddings integrated with attention-enhanced Bidirectional Long Short-Term Memory (BiLSTM) network. Our methodology involves meticulous data preprocessing and generation of word embeddings to better the model‘s ability. The proposed method achieves an overall accuracy of 99%, significantly outperforming character level identification approaches. The results reveal high precision across most language pairs, though minor classification confusions persist between closely related languages. Our findings demonstrate the robustness of the CBOW-BiLSTM model for Devanagari script classification and highlights the importance of accurate language identification in preserving linguistic diversity in multilingual environments. Keywords: Language Identification, Devanagari Script, Natural Language Processing, Neural Networks
%U https://aclanthology.org/2025.chipsal-1.40/
%P 339-343
Markdown (Informal)
[SKPD Emergency @ NLU of Devanagari Script Languages 2025: Devanagari Script Classification using CBOW Embeddings with Attention-Enhanced BiLSTM](https://aclanthology.org/2025.chipsal-1.40/) (Shakya et al., CHiPSAL 2025)
ACL