@inproceedings{rahman-etal-2025-cuet,
title = "{CUET}{\_}{I}gnite@{D}ravidian{L}ang{T}ech 2025: Detection of Abusive Comments in {T}amil Text Using Transformer Models",
author = "Rahman, MD.Mahadi and
Uddin, Mohammad Minhaj and
Arefin, Mohammad Shamsul",
editor = "Chakravarthi, Bharathi Raja and
Priyadharshini, Ruba and
Madasamy, Anand Kumar and
Thavareesan, Sajeetha and
Sherly, Elizabeth and
Rajiakodi, Saranya and
Palani, Balasubramanian and
Subramanian, Malliga and
Cn, Subalalitha and
Chinnappa, Dhivya",
booktitle = "Proceedings of the Fifth Workshop on Speech, Vision, and Language Technologies for Dravidian Languages",
month = may,
year = "2025",
address = "Acoma, The Albuquerque Convention Center, Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.dravidianlangtech-1.70/",
doi = "10.18653/v1/2025.dravidianlangtech-1.70",
pages = "392--397",
ISBN = "979-8-89176-228-2",
abstract = "Abusive comment detection in low-resource languages is a challenging task particularly when addressing gender-based abuse. Identifying abusive language targeting women is crucial for effective content moderation and fostering safer online spaces. A shared task on abusive comment detection in Tamil text organized by DravidianLangTech@NAACL 2025 allowed us to address this challenge using a curated dataset. For this task, we experimented with various machine learning (ML) and deep learning (DL) models including Logistic Regression, Random Forest, SVM, CNN, LSTM, BiLSTMand transformer-based models such as mBERT, IndicBERT, XLMRoBERTa and many more. The dataset comprised of Tamil YouTube comments annotated with binary labels, Abusive and NonAbusive capturing explicit abuse, implicit biases and stereotypes. Our experiments demonstrated that XLM-RoBERTa achieved the highest macro F1-score(0.80), highlighting its effectiveness in handling Tamil text. This research contributes to advancing abusive language detection and natural language processing in lowresource languages particularly for addressing gender-based abuse online."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rahman-etal-2025-cuet">
<titleInfo>
<title>CUET_Ignite@DravidianLangTech 2025: Detection of Abusive Comments in Tamil Text Using Transformer Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">MD.Mahadi</namePart>
<namePart type="family">Rahman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Minhaj</namePart>
<namePart type="family">Uddin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Shamsul</namePart>
<namePart type="family">Arefin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Workshop on Speech, Vision, and Language Technologies for Dravidian Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="given">Raja</namePart>
<namePart type="family">Chakravarthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruba</namePart>
<namePart type="family">Priyadharshini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anand</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Madasamy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sajeetha</namePart>
<namePart type="family">Thavareesan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Sherly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saranya</namePart>
<namePart type="family">Rajiakodi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Balasubramanian</namePart>
<namePart type="family">Palani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Malliga</namePart>
<namePart type="family">Subramanian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Subalalitha</namePart>
<namePart type="family">Cn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dhivya</namePart>
<namePart type="family">Chinnappa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Acoma, The Albuquerque Convention Center, Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-228-2</identifier>
</relatedItem>
<abstract>Abusive comment detection in low-resource languages is a challenging task particularly when addressing gender-based abuse. Identifying abusive language targeting women is crucial for effective content moderation and fostering safer online spaces. A shared task on abusive comment detection in Tamil text organized by DravidianLangTech@NAACL 2025 allowed us to address this challenge using a curated dataset. For this task, we experimented with various machine learning (ML) and deep learning (DL) models including Logistic Regression, Random Forest, SVM, CNN, LSTM, BiLSTMand transformer-based models such as mBERT, IndicBERT, XLMRoBERTa and many more. The dataset comprised of Tamil YouTube comments annotated with binary labels, Abusive and NonAbusive capturing explicit abuse, implicit biases and stereotypes. Our experiments demonstrated that XLM-RoBERTa achieved the highest macro F1-score(0.80), highlighting its effectiveness in handling Tamil text. This research contributes to advancing abusive language detection and natural language processing in lowresource languages particularly for addressing gender-based abuse online.</abstract>
<identifier type="citekey">rahman-etal-2025-cuet</identifier>
<identifier type="doi">10.18653/v1/2025.dravidianlangtech-1.70</identifier>
<location>
<url>https://aclanthology.org/2025.dravidianlangtech-1.70/</url>
</location>
<part>
<date>2025-05</date>
<extent unit="page">
<start>392</start>
<end>397</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CUET_Ignite@DravidianLangTech 2025: Detection of Abusive Comments in Tamil Text Using Transformer Models
%A Rahman, MD.Mahadi
%A Uddin, Mohammad Minhaj
%A Arefin, Mohammad Shamsul
%Y Chakravarthi, Bharathi Raja
%Y Priyadharshini, Ruba
%Y Madasamy, Anand Kumar
%Y Thavareesan, Sajeetha
%Y Sherly, Elizabeth
%Y Rajiakodi, Saranya
%Y Palani, Balasubramanian
%Y Subramanian, Malliga
%Y Cn, Subalalitha
%Y Chinnappa, Dhivya
%S Proceedings of the Fifth Workshop on Speech, Vision, and Language Technologies for Dravidian Languages
%D 2025
%8 May
%I Association for Computational Linguistics
%C Acoma, The Albuquerque Convention Center, Albuquerque, New Mexico
%@ 979-8-89176-228-2
%F rahman-etal-2025-cuet
%X Abusive comment detection in low-resource languages is a challenging task particularly when addressing gender-based abuse. Identifying abusive language targeting women is crucial for effective content moderation and fostering safer online spaces. A shared task on abusive comment detection in Tamil text organized by DravidianLangTech@NAACL 2025 allowed us to address this challenge using a curated dataset. For this task, we experimented with various machine learning (ML) and deep learning (DL) models including Logistic Regression, Random Forest, SVM, CNN, LSTM, BiLSTMand transformer-based models such as mBERT, IndicBERT, XLMRoBERTa and many more. The dataset comprised of Tamil YouTube comments annotated with binary labels, Abusive and NonAbusive capturing explicit abuse, implicit biases and stereotypes. Our experiments demonstrated that XLM-RoBERTa achieved the highest macro F1-score(0.80), highlighting its effectiveness in handling Tamil text. This research contributes to advancing abusive language detection and natural language processing in lowresource languages particularly for addressing gender-based abuse online.
%R 10.18653/v1/2025.dravidianlangtech-1.70
%U https://aclanthology.org/2025.dravidianlangtech-1.70/
%U https://doi.org/10.18653/v1/2025.dravidianlangtech-1.70
%P 392-397
Markdown (Informal)
[CUET_Ignite@DravidianLangTech 2025: Detection of Abusive Comments in Tamil Text Using Transformer Models](https://aclanthology.org/2025.dravidianlangtech-1.70/) (Rahman et al., DravidianLangTech 2025)
ACL