@inproceedings{prakash-etal-2026-dpr,
title = "{DPR}@{D}ravidian{L}ang{T}ech 2026: Transformer-Based Abusive Content Detection for {T}amil Text Targeting Women on Social Media",
author = "Prakash, Diya and
S, Praveen Kumar and
Kumar, R Ranjith and
Palani, Balasubramanian and
Jose, Jobin and
Rajamanickam, Siranjeevi",
editor = "Chakravarthi, Bharathi Raja and
Priyadharshini, Ruba and
Madasamy, Anand Kumar and
Thavareesan, Sajeetha and
Rajiakodi, Saranya and
Navaneethakrishnan, Subalalitha and
Chinnappa, Dhivya and
Palani, Balasubramanian and
Subramanian, Malliga and
Shanmugavadivel, Kogilavani and
Rajalakshmi, Ratnavel",
booktitle = "Proceedings of the Sixth Workshop on Speech, Vision, and Language Technologies for {D}ravidian Languages",
month = jul,
year = "2026",
address = "Underline (Virtual)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.dravidianlangtech-1.35/",
pages = "242--247",
ISBN = "979-8-89176-401-9",
abstract = "The fast-growing number of content in Tamil in social media has led to increasing abusive and gender-directed hate speech in online platforms. Detecting abusive content written in Tamil is relatively difficult owing to the complex morphological structure of Tamil language, its dialects, transliteration, and contextualized usage. In this study, the use of transformer-based pretrained language models in detecting abusive content in Tamil was explored. Five transformer-based models{---}mBERT, MuRIL, XLM-RoBERTa, IndicBERT, and Tamil-BERT{---}were fine-tuned and tested using DravidianLangTech 2026 shared task dataset. The experimental results show that the best-performing model was Tamil-BERT with an accuracy rate of 80.72{\%} owing to Tamil-specific pretraining and better morphological analysis capabilities. Our system ranks 5th at the leaderboard of the DravidianLangTech 2026 shared task challenge. The source code and fine-tuned models are opensource and publicly accessible."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="prakash-etal-2026-dpr">
<titleInfo>
<title>DPR@DravidianLangTech 2026: Transformer-Based Abusive Content Detection for Tamil Text Targeting Women on Social Media</title>
</titleInfo>
<name type="personal">
<namePart type="given">Diya</namePart>
<namePart type="family">Prakash</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Praveen</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">S</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">R</namePart>
<namePart type="given">Ranjith</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Balasubramanian</namePart>
<namePart type="family">Palani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jobin</namePart>
<namePart type="family">Jose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Siranjeevi</namePart>
<namePart type="family">Rajamanickam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth Workshop on Speech, Vision, and Language Technologies for Dravidian Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="given">Raja</namePart>
<namePart type="family">Chakravarthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruba</namePart>
<namePart type="family">Priyadharshini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anand</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Madasamy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sajeetha</namePart>
<namePart type="family">Thavareesan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saranya</namePart>
<namePart type="family">Rajiakodi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Subalalitha</namePart>
<namePart type="family">Navaneethakrishnan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dhivya</namePart>
<namePart type="family">Chinnappa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Balasubramanian</namePart>
<namePart type="family">Palani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Malliga</namePart>
<namePart type="family">Subramanian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kogilavani</namePart>
<namePart type="family">Shanmugavadivel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ratnavel</namePart>
<namePart type="family">Rajalakshmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Underline (Virtual)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-401-9</identifier>
</relatedItem>
<abstract>The fast-growing number of content in Tamil in social media has led to increasing abusive and gender-directed hate speech in online platforms. Detecting abusive content written in Tamil is relatively difficult owing to the complex morphological structure of Tamil language, its dialects, transliteration, and contextualized usage. In this study, the use of transformer-based pretrained language models in detecting abusive content in Tamil was explored. Five transformer-based models—mBERT, MuRIL, XLM-RoBERTa, IndicBERT, and Tamil-BERT—were fine-tuned and tested using DravidianLangTech 2026 shared task dataset. The experimental results show that the best-performing model was Tamil-BERT with an accuracy rate of 80.72% owing to Tamil-specific pretraining and better morphological analysis capabilities. Our system ranks 5th at the leaderboard of the DravidianLangTech 2026 shared task challenge. The source code and fine-tuned models are opensource and publicly accessible.</abstract>
<identifier type="citekey">prakash-etal-2026-dpr</identifier>
<location>
<url>https://aclanthology.org/2026.dravidianlangtech-1.35/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>242</start>
<end>247</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DPR@DravidianLangTech 2026: Transformer-Based Abusive Content Detection for Tamil Text Targeting Women on Social Media
%A Prakash, Diya
%A S, Praveen Kumar
%A Kumar, R. Ranjith
%A Palani, Balasubramanian
%A Jose, Jobin
%A Rajamanickam, Siranjeevi
%Y Chakravarthi, Bharathi Raja
%Y Priyadharshini, Ruba
%Y Madasamy, Anand Kumar
%Y Thavareesan, Sajeetha
%Y Rajiakodi, Saranya
%Y Navaneethakrishnan, Subalalitha
%Y Chinnappa, Dhivya
%Y Palani, Balasubramanian
%Y Subramanian, Malliga
%Y Shanmugavadivel, Kogilavani
%Y Rajalakshmi, Ratnavel
%S Proceedings of the Sixth Workshop on Speech, Vision, and Language Technologies for Dravidian Languages
%D 2026
%8 July
%I Association for Computational Linguistics
%C Underline (Virtual)
%@ 979-8-89176-401-9
%F prakash-etal-2026-dpr
%X The fast-growing number of content in Tamil in social media has led to increasing abusive and gender-directed hate speech in online platforms. Detecting abusive content written in Tamil is relatively difficult owing to the complex morphological structure of Tamil language, its dialects, transliteration, and contextualized usage. In this study, the use of transformer-based pretrained language models in detecting abusive content in Tamil was explored. Five transformer-based models—mBERT, MuRIL, XLM-RoBERTa, IndicBERT, and Tamil-BERT—were fine-tuned and tested using DravidianLangTech 2026 shared task dataset. The experimental results show that the best-performing model was Tamil-BERT with an accuracy rate of 80.72% owing to Tamil-specific pretraining and better morphological analysis capabilities. Our system ranks 5th at the leaderboard of the DravidianLangTech 2026 shared task challenge. The source code and fine-tuned models are opensource and publicly accessible.
%U https://aclanthology.org/2026.dravidianlangtech-1.35/
%P 242-247
Markdown (Informal)
[DPR@DravidianLangTech 2026: Transformer-Based Abusive Content Detection for Tamil Text Targeting Women on Social Media](https://aclanthology.org/2026.dravidianlangtech-1.35/) (Prakash et al., DravidianLangTech 2026)
ACL