@inproceedings{khadka-etal-2025-nepali,
title = "{N}epali Transformers@{NLU} of {D}evanagari Script Languages 2025: Detection of Language, Hate Speech and Targets",
author = "Khadka, Pilot and
Bk, Ankit and
Acharya, Ashish and
K.c., Bikram and
Shrestha, Sandesh and
Thapa, Rabin",
editor = "Sarveswaran, Kengatharaiyer and
Vaidya, Ashwini and
Krishna Bal, Bal and
Shams, Sana and
Thapa, Surendrabikram",
booktitle = "Proceedings of the First Workshop on Challenges in Processing South Asian Languages (CHiPSAL 2025)",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2025.chipsal-1.36/",
pages = "314--319",
abstract = "The Devanagari script, an Indic script used by a diverse range of South Asian languages, presents a significant challenge in Natural Language Processing (NLP) research. The dialect and language variation, complex script features, and limited language-specific tools make development difficult. This shared task aims to address this challenge by bringing together researchers and practitioners to solve three key problems: Language identification, Hate speech detection, and Targets of Hate speech identification. The selected languages- Hindi, Nepali, Marathi, Sanskrit, and Bhojpuri- are widely used in South Asia and represent distinct linguistic structures. In this work, we explore the effectiveness of both machine-learning models and transformer-based models on all three sub-tasks. Our results demonstrate strong performance of the multilingual transformer model, particularly one pre-trained on domain-specific social media data, across all three tasks. The multilingual RoBERTa model, trained on the Twitter dataset, achieved a remarkable accuracy and F1-score of 99.5{\%} on language identification (Task A), 88.3{\%} and 72.5{\%} on Hate Speech detection (Task B), and 68.6{\%} and 61.8{\%} on Hate Speech Target Classification (Task C)."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="khadka-etal-2025-nepali">
<titleInfo>
<title>Nepali Transformers@NLU of Devanagari Script Languages 2025: Detection of Language, Hate Speech and Targets</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pilot</namePart>
<namePart type="family">Khadka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ankit</namePart>
<namePart type="family">Bk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ashish</namePart>
<namePart type="family">Acharya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bikram</namePart>
<namePart type="family">K.c.</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sandesh</namePart>
<namePart type="family">Shrestha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rabin</namePart>
<namePart type="family">Thapa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Challenges in Processing South Asian Languages (CHiPSAL 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kengatharaiyer</namePart>
<namePart type="family">Sarveswaran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ashwini</namePart>
<namePart type="family">Vaidya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bal</namePart>
<namePart type="family">Krishna Bal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sana</namePart>
<namePart type="family">Shams</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Surendrabikram</namePart>
<namePart type="family">Thapa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The Devanagari script, an Indic script used by a diverse range of South Asian languages, presents a significant challenge in Natural Language Processing (NLP) research. The dialect and language variation, complex script features, and limited language-specific tools make development difficult. This shared task aims to address this challenge by bringing together researchers and practitioners to solve three key problems: Language identification, Hate speech detection, and Targets of Hate speech identification. The selected languages- Hindi, Nepali, Marathi, Sanskrit, and Bhojpuri- are widely used in South Asia and represent distinct linguistic structures. In this work, we explore the effectiveness of both machine-learning models and transformer-based models on all three sub-tasks. Our results demonstrate strong performance of the multilingual transformer model, particularly one pre-trained on domain-specific social media data, across all three tasks. The multilingual RoBERTa model, trained on the Twitter dataset, achieved a remarkable accuracy and F1-score of 99.5% on language identification (Task A), 88.3% and 72.5% on Hate Speech detection (Task B), and 68.6% and 61.8% on Hate Speech Target Classification (Task C).</abstract>
<identifier type="citekey">khadka-etal-2025-nepali</identifier>
<location>
<url>https://aclanthology.org/2025.chipsal-1.36/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>314</start>
<end>319</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Nepali Transformers@NLU of Devanagari Script Languages 2025: Detection of Language, Hate Speech and Targets
%A Khadka, Pilot
%A Bk, Ankit
%A Acharya, Ashish
%A K.c., Bikram
%A Shrestha, Sandesh
%A Thapa, Rabin
%Y Sarveswaran, Kengatharaiyer
%Y Vaidya, Ashwini
%Y Krishna Bal, Bal
%Y Shams, Sana
%Y Thapa, Surendrabikram
%S Proceedings of the First Workshop on Challenges in Processing South Asian Languages (CHiPSAL 2025)
%D 2025
%8 January
%I International Committee on Computational Linguistics
%C Abu Dhabi, UAE
%F khadka-etal-2025-nepali
%X The Devanagari script, an Indic script used by a diverse range of South Asian languages, presents a significant challenge in Natural Language Processing (NLP) research. The dialect and language variation, complex script features, and limited language-specific tools make development difficult. This shared task aims to address this challenge by bringing together researchers and practitioners to solve three key problems: Language identification, Hate speech detection, and Targets of Hate speech identification. The selected languages- Hindi, Nepali, Marathi, Sanskrit, and Bhojpuri- are widely used in South Asia and represent distinct linguistic structures. In this work, we explore the effectiveness of both machine-learning models and transformer-based models on all three sub-tasks. Our results demonstrate strong performance of the multilingual transformer model, particularly one pre-trained on domain-specific social media data, across all three tasks. The multilingual RoBERTa model, trained on the Twitter dataset, achieved a remarkable accuracy and F1-score of 99.5% on language identification (Task A), 88.3% and 72.5% on Hate Speech detection (Task B), and 68.6% and 61.8% on Hate Speech Target Classification (Task C).
%U https://aclanthology.org/2025.chipsal-1.36/
%P 314-319
Markdown (Informal)
[Nepali Transformers@NLU of Devanagari Script Languages 2025: Detection of Language, Hate Speech and Targets](https://aclanthology.org/2025.chipsal-1.36/) (Khadka et al., CHiPSAL 2025)
ACL