@inproceedings{rajalakshmi-etal-2022-dlrg,
title = "{DLRG}@{D}ravidian{L}ang{T}ech-{ACL}2022: Abusive Comment Detection in {T}amil using Multilingual Transformer Models",
author = "Rajalakshmi, Ratnavel and
Duraphe, Ankita and
Shibani, Antonette",
editor = "Chakravarthi, Bharathi Raja and
Priyadharshini, Ruba and
Madasamy, Anand Kumar and
Krishnamurthy, Parameswari and
Sherly, Elizabeth and
Mahesan, Sinnathamby",
booktitle = "Proceedings of the Second Workshop on Speech and Language Technologies for Dravidian Languages",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.dravidianlangtech-1.32",
doi = "10.18653/v1/2022.dravidianlangtech-1.32",
pages = "207--213",
abstract = "Online Social Network has let people to connect and interact with each other. It does, however, also provide a platform for online abusers to propagate abusive content. The vast majority of abusive remarks are written in a multilingual style, which allows them to easily slip past internet inspection. This paper presents a system developed for the Shared Task on Abusive Comment Detection (Misogyny, Misandry, Homophobia, Transphobic, Xenophobia, CounterSpeech, Hope Speech) in Tamil DravidianLangTech@ACL 2022 to detect the abusive category of each comment. We approach the task with three methodologies - Machine Learning, Deep Learning and Transformer-based modeling, for two sets of data - Tamil and Tamil+English language dataset. The dataset used in our system can be accessed from the competition on CodaLab. For Machine Learning, eight algorithms were implemented, among which Random Forest gave the best result with Tamil+English dataset, with a weighted average F1-score of 0.78. For Deep Learning, Bi-Directional LSTM gave best result with pre-trained word embeddings. In Transformer-based modeling, we used IndicBERT and mBERT with fine-tuning, among which mBERT gave the best result for Tamil dataset with a weighted average F1-score of 0.7.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rajalakshmi-etal-2022-dlrg">
<titleInfo>
<title>DLRG@DravidianLangTech-ACL2022: Abusive Comment Detection in Tamil using Multilingual Transformer Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ratnavel</namePart>
<namePart type="family">Rajalakshmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ankita</namePart>
<namePart type="family">Duraphe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonette</namePart>
<namePart type="family">Shibani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Speech and Language Technologies for Dravidian Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="given">Raja</namePart>
<namePart type="family">Chakravarthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruba</namePart>
<namePart type="family">Priyadharshini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anand</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Madasamy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Parameswari</namePart>
<namePart type="family">Krishnamurthy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Sherly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sinnathamby</namePart>
<namePart type="family">Mahesan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Online Social Network has let people to connect and interact with each other. It does, however, also provide a platform for online abusers to propagate abusive content. The vast majority of abusive remarks are written in a multilingual style, which allows them to easily slip past internet inspection. This paper presents a system developed for the Shared Task on Abusive Comment Detection (Misogyny, Misandry, Homophobia, Transphobic, Xenophobia, CounterSpeech, Hope Speech) in Tamil DravidianLangTech@ACL 2022 to detect the abusive category of each comment. We approach the task with three methodologies - Machine Learning, Deep Learning and Transformer-based modeling, for two sets of data - Tamil and Tamil+English language dataset. The dataset used in our system can be accessed from the competition on CodaLab. For Machine Learning, eight algorithms were implemented, among which Random Forest gave the best result with Tamil+English dataset, with a weighted average F1-score of 0.78. For Deep Learning, Bi-Directional LSTM gave best result with pre-trained word embeddings. In Transformer-based modeling, we used IndicBERT and mBERT with fine-tuning, among which mBERT gave the best result for Tamil dataset with a weighted average F1-score of 0.7.</abstract>
<identifier type="citekey">rajalakshmi-etal-2022-dlrg</identifier>
<identifier type="doi">10.18653/v1/2022.dravidianlangtech-1.32</identifier>
<location>
<url>https://aclanthology.org/2022.dravidianlangtech-1.32</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>207</start>
<end>213</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DLRG@DravidianLangTech-ACL2022: Abusive Comment Detection in Tamil using Multilingual Transformer Models
%A Rajalakshmi, Ratnavel
%A Duraphe, Ankita
%A Shibani, Antonette
%Y Chakravarthi, Bharathi Raja
%Y Priyadharshini, Ruba
%Y Madasamy, Anand Kumar
%Y Krishnamurthy, Parameswari
%Y Sherly, Elizabeth
%Y Mahesan, Sinnathamby
%S Proceedings of the Second Workshop on Speech and Language Technologies for Dravidian Languages
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F rajalakshmi-etal-2022-dlrg
%X Online Social Network has let people to connect and interact with each other. It does, however, also provide a platform for online abusers to propagate abusive content. The vast majority of abusive remarks are written in a multilingual style, which allows them to easily slip past internet inspection. This paper presents a system developed for the Shared Task on Abusive Comment Detection (Misogyny, Misandry, Homophobia, Transphobic, Xenophobia, CounterSpeech, Hope Speech) in Tamil DravidianLangTech@ACL 2022 to detect the abusive category of each comment. We approach the task with three methodologies - Machine Learning, Deep Learning and Transformer-based modeling, for two sets of data - Tamil and Tamil+English language dataset. The dataset used in our system can be accessed from the competition on CodaLab. For Machine Learning, eight algorithms were implemented, among which Random Forest gave the best result with Tamil+English dataset, with a weighted average F1-score of 0.78. For Deep Learning, Bi-Directional LSTM gave best result with pre-trained word embeddings. In Transformer-based modeling, we used IndicBERT and mBERT with fine-tuning, among which mBERT gave the best result for Tamil dataset with a weighted average F1-score of 0.7.
%R 10.18653/v1/2022.dravidianlangtech-1.32
%U https://aclanthology.org/2022.dravidianlangtech-1.32
%U https://doi.org/10.18653/v1/2022.dravidianlangtech-1.32
%P 207-213
Markdown (Informal)
[DLRG@DravidianLangTech-ACL2022: Abusive Comment Detection in Tamil using Multilingual Transformer Models](https://aclanthology.org/2022.dravidianlangtech-1.32) (Rajalakshmi et al., DravidianLangTech 2022)
ACL