@inproceedings{srivastava-2025-dweshvaani,
title = "{D}wesh{V}aani: An {LLM} for Detecting Religious Hate Speech in Code-Mixed {H}indi-{E}nglish",
author = "Srivastava, Varad",
editor = "Sarveswaran, Kengatharaiyer and
Vaidya, Ashwini and
Krishna Bal, Bal and
Shams, Sana and
Thapa, Surendrabikram",
booktitle = "Proceedings of the First Workshop on Challenges in Processing South Asian Languages (CHiPSAL 2025)",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2025.chipsal-1.5/",
pages = "46--60",
abstract = "Traditional language models in NLP have been extensively made use of, in hateful speech detection problems. With the growth of social media, content in regional languages has grown exponentially. However, use of language models as well as LLMs on code-mixed Hindi-English hateful speech detection is under-explored. Our work addresses this gap by investigating both cutting-edge LLMs by Meta, Google, OpenAI, Nvidia as well as Indic-LLMs like Sarvam, Indic-Gemma, and Airavata on hateful speech detection in code-mixed Hindi-English languages in a comprehensive set of few-shot scenarios which include examples selected randomly, as well as with retrieval-augmented generation (RAG) based on MuRIL language model. We observed that Indic-LLMs which are instruction tuned on Indian content fall behind on the task. We also experimented with fine-tuning approaches, where we use knowledge-distillation based-finetuning by using extracted information about rationale behind hate speech, as part of the fine-tuning process. Finally, we also propose Dwesh-Vaani, an LLM based on fine-tuned Gemma-2, that out-performs all other approaches at the task of religious hateful speech detection as well as targeted religion identification in code-mixed Hindi-English languages."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="srivastava-2025-dweshvaani">
<titleInfo>
<title>DweshVaani: An LLM for Detecting Religious Hate Speech in Code-Mixed Hindi-English</title>
</titleInfo>
<name type="personal">
<namePart type="given">Varad</namePart>
<namePart type="family">Srivastava</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Challenges in Processing South Asian Languages (CHiPSAL 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kengatharaiyer</namePart>
<namePart type="family">Sarveswaran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ashwini</namePart>
<namePart type="family">Vaidya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bal</namePart>
<namePart type="family">Krishna Bal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sana</namePart>
<namePart type="family">Shams</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Surendrabikram</namePart>
<namePart type="family">Thapa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Traditional language models in NLP have been extensively made use of, in hateful speech detection problems. With the growth of social media, content in regional languages has grown exponentially. However, use of language models as well as LLMs on code-mixed Hindi-English hateful speech detection is under-explored. Our work addresses this gap by investigating both cutting-edge LLMs by Meta, Google, OpenAI, Nvidia as well as Indic-LLMs like Sarvam, Indic-Gemma, and Airavata on hateful speech detection in code-mixed Hindi-English languages in a comprehensive set of few-shot scenarios which include examples selected randomly, as well as with retrieval-augmented generation (RAG) based on MuRIL language model. We observed that Indic-LLMs which are instruction tuned on Indian content fall behind on the task. We also experimented with fine-tuning approaches, where we use knowledge-distillation based-finetuning by using extracted information about rationale behind hate speech, as part of the fine-tuning process. Finally, we also propose Dwesh-Vaani, an LLM based on fine-tuned Gemma-2, that out-performs all other approaches at the task of religious hateful speech detection as well as targeted religion identification in code-mixed Hindi-English languages.</abstract>
<identifier type="citekey">srivastava-2025-dweshvaani</identifier>
<location>
<url>https://aclanthology.org/2025.chipsal-1.5/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>46</start>
<end>60</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DweshVaani: An LLM for Detecting Religious Hate Speech in Code-Mixed Hindi-English
%A Srivastava, Varad
%Y Sarveswaran, Kengatharaiyer
%Y Vaidya, Ashwini
%Y Krishna Bal, Bal
%Y Shams, Sana
%Y Thapa, Surendrabikram
%S Proceedings of the First Workshop on Challenges in Processing South Asian Languages (CHiPSAL 2025)
%D 2025
%8 January
%I International Committee on Computational Linguistics
%C Abu Dhabi, UAE
%F srivastava-2025-dweshvaani
%X Traditional language models in NLP have been extensively made use of, in hateful speech detection problems. With the growth of social media, content in regional languages has grown exponentially. However, use of language models as well as LLMs on code-mixed Hindi-English hateful speech detection is under-explored. Our work addresses this gap by investigating both cutting-edge LLMs by Meta, Google, OpenAI, Nvidia as well as Indic-LLMs like Sarvam, Indic-Gemma, and Airavata on hateful speech detection in code-mixed Hindi-English languages in a comprehensive set of few-shot scenarios which include examples selected randomly, as well as with retrieval-augmented generation (RAG) based on MuRIL language model. We observed that Indic-LLMs which are instruction tuned on Indian content fall behind on the task. We also experimented with fine-tuning approaches, where we use knowledge-distillation based-finetuning by using extracted information about rationale behind hate speech, as part of the fine-tuning process. Finally, we also propose Dwesh-Vaani, an LLM based on fine-tuned Gemma-2, that out-performs all other approaches at the task of religious hateful speech detection as well as targeted religion identification in code-mixed Hindi-English languages.
%U https://aclanthology.org/2025.chipsal-1.5/
%P 46-60
Markdown (Informal)
[DweshVaani: An LLM for Detecting Religious Hate Speech in Code-Mixed Hindi-English](https://aclanthology.org/2025.chipsal-1.5/) (Srivastava, CHiPSAL 2025)
ACL