@inproceedings{thapa-etal-2025-natural,
title = "Natural Language Understanding of {D}evanagari Script Languages: Language Identification, Hate Speech and its Target Detection",
author = "Thapa, Surendrabikram and
Rauniyar, Kritesh and
Jafri, Farhan Ahmad and
Adhikari, Surabhi and
Sarveswaran, Kengatharaiyer and
Bal, Bal Krishna and
Veeramani, Hariram and
Naseem, Usman",
editor = "Sarveswaran, Kengatharaiyer and
Vaidya, Ashwini and
Krishna Bal, Bal and
Shams, Sana and
Thapa, Surendrabikram",
booktitle = "Proceedings of the First Workshop on Challenges in Processing South Asian Languages (CHiPSAL 2025)",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2025.chipsal-1.7/",
pages = "71--82",
abstract = "The growing use of Devanagari-script languages such as Hindi, Nepali, Marathi, Sanskrit, and Bhojpuri on social media presents unique challenges for natural language understanding (NLU), particularly in language identification, hate speech detection, and target classification. To address these challenges, we organized a shared task with three subtasks: (i) identifying the language of Devanagari-script text, (ii) detecting hate speech, and (iii) classifying hate speech targets into individual, community, or organization. A curated dataset combining multiple corpora was provided, with splits for training, evaluation, and testing. The task attracted 113 participants, with 32 teams submitting models evaluated on accuracy, precision, recall, and macro F1-score. Participants applied innovative methods, including large language models, transformer models, and multilingual embeddings, to tackle the linguistic complexities of Devanagari-script languages. This paper summarizes the shared task, datasets, and results, and aims to contribute to advancing NLU for low-resource languages and fostering inclusive, culturally aware natural language processing (NLP) solutions."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="thapa-etal-2025-natural">
<titleInfo>
<title>Natural Language Understanding of Devanagari Script Languages: Language Identification, Hate Speech and its Target Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Surendrabikram</namePart>
<namePart type="family">Thapa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kritesh</namePart>
<namePart type="family">Rauniyar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Farhan</namePart>
<namePart type="given">Ahmad</namePart>
<namePart type="family">Jafri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Surabhi</namePart>
<namePart type="family">Adhikari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kengatharaiyer</namePart>
<namePart type="family">Sarveswaran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bal</namePart>
<namePart type="given">Krishna</namePart>
<namePart type="family">Bal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hariram</namePart>
<namePart type="family">Veeramani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Usman</namePart>
<namePart type="family">Naseem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Challenges in Processing South Asian Languages (CHiPSAL 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kengatharaiyer</namePart>
<namePart type="family">Sarveswaran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ashwini</namePart>
<namePart type="family">Vaidya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bal</namePart>
<namePart type="family">Krishna Bal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sana</namePart>
<namePart type="family">Shams</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Surendrabikram</namePart>
<namePart type="family">Thapa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The growing use of Devanagari-script languages such as Hindi, Nepali, Marathi, Sanskrit, and Bhojpuri on social media presents unique challenges for natural language understanding (NLU), particularly in language identification, hate speech detection, and target classification. To address these challenges, we organized a shared task with three subtasks: (i) identifying the language of Devanagari-script text, (ii) detecting hate speech, and (iii) classifying hate speech targets into individual, community, or organization. A curated dataset combining multiple corpora was provided, with splits for training, evaluation, and testing. The task attracted 113 participants, with 32 teams submitting models evaluated on accuracy, precision, recall, and macro F1-score. Participants applied innovative methods, including large language models, transformer models, and multilingual embeddings, to tackle the linguistic complexities of Devanagari-script languages. This paper summarizes the shared task, datasets, and results, and aims to contribute to advancing NLU for low-resource languages and fostering inclusive, culturally aware natural language processing (NLP) solutions.</abstract>
<identifier type="citekey">thapa-etal-2025-natural</identifier>
<location>
<url>https://aclanthology.org/2025.chipsal-1.7/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>71</start>
<end>82</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Natural Language Understanding of Devanagari Script Languages: Language Identification, Hate Speech and its Target Detection
%A Thapa, Surendrabikram
%A Rauniyar, Kritesh
%A Jafri, Farhan Ahmad
%A Adhikari, Surabhi
%A Sarveswaran, Kengatharaiyer
%A Bal, Bal Krishna
%A Veeramani, Hariram
%A Naseem, Usman
%Y Sarveswaran, Kengatharaiyer
%Y Vaidya, Ashwini
%Y Krishna Bal, Bal
%Y Shams, Sana
%Y Thapa, Surendrabikram
%S Proceedings of the First Workshop on Challenges in Processing South Asian Languages (CHiPSAL 2025)
%D 2025
%8 January
%I International Committee on Computational Linguistics
%C Abu Dhabi, UAE
%F thapa-etal-2025-natural
%X The growing use of Devanagari-script languages such as Hindi, Nepali, Marathi, Sanskrit, and Bhojpuri on social media presents unique challenges for natural language understanding (NLU), particularly in language identification, hate speech detection, and target classification. To address these challenges, we organized a shared task with three subtasks: (i) identifying the language of Devanagari-script text, (ii) detecting hate speech, and (iii) classifying hate speech targets into individual, community, or organization. A curated dataset combining multiple corpora was provided, with splits for training, evaluation, and testing. The task attracted 113 participants, with 32 teams submitting models evaluated on accuracy, precision, recall, and macro F1-score. Participants applied innovative methods, including large language models, transformer models, and multilingual embeddings, to tackle the linguistic complexities of Devanagari-script languages. This paper summarizes the shared task, datasets, and results, and aims to contribute to advancing NLU for low-resource languages and fostering inclusive, culturally aware natural language processing (NLP) solutions.
%U https://aclanthology.org/2025.chipsal-1.7/
%P 71-82
Markdown (Informal)
[Natural Language Understanding of Devanagari Script Languages: Language Identification, Hate Speech and its Target Detection](https://aclanthology.org/2025.chipsal-1.7/) (Thapa et al., CHiPSAL 2025)
ACL
- Surendrabikram Thapa, Kritesh Rauniyar, Farhan Ahmad Jafri, Surabhi Adhikari, Kengatharaiyer Sarveswaran, Bal Krishna Bal, Hariram Veeramani, and Usman Naseem. 2025. Natural Language Understanding of Devanagari Script Languages: Language Identification, Hate Speech and its Target Detection. In Proceedings of the First Workshop on Challenges in Processing South Asian Languages (CHiPSAL 2025), pages 71–82, Abu Dhabi, UAE. International Committee on Computational Linguistics.