@inproceedings{patankar-etal-2022-optimize,
title = "{O}ptimize{\_}{P}rime@{D}ravidian{L}ang{T}ech-{ACL}2022: Abusive Comment Detection in {T}amil",
author = "Patankar, Shantanu and
Gokhale, Omkar and
Litake, Onkar and
Mandke, Aditya and
Kadam, Dipali",
editor = "Chakravarthi, Bharathi Raja and
Priyadharshini, Ruba and
Madasamy, Anand Kumar and
Krishnamurthy, Parameswari and
Sherly, Elizabeth and
Mahesan, Sinnathamby",
booktitle = "Proceedings of the Second Workshop on Speech and Language Technologies for Dravidian Languages",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.dravidianlangtech-1.36",
doi = "10.18653/v1/2022.dravidianlangtech-1.36",
pages = "235--239",
abstract = "This paper tries to address the problem of abusive comment detection in low-resource indic languages. Abusive comments are statements that are offensive to a person or a group of people. These comments are targeted toward individuals belonging to specific ethnicities, genders, caste, race, sexuality, etc. Abusive Comment Detection is a significant problem, especially with the recent rise in social media users. This paper presents the approach used by our team {---} Optimize{\_}Prime, in the ACL 2022 shared task {``}Abusive Comment Detection in Tamil.{''} This task detects and classifies YouTube comments in Tamil and Tamil-English Codemixed format into multiple categories. We have used three methods to optimize our results: Ensemble models, Recurrent Neural Networks, and Transformers. In the Tamil data, MuRIL and XLM-RoBERTA were our best performing models with a macro-averaged f1 score of 0.43. Furthermore, for the Code-mixed data, MuRIL and M-BERT provided sublime results, with a macro-averaged f1 score of 0.45.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="patankar-etal-2022-optimize">
<titleInfo>
<title>Optimize_Prime@DravidianLangTech-ACL2022: Abusive Comment Detection in Tamil</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shantanu</namePart>
<namePart type="family">Patankar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Omkar</namePart>
<namePart type="family">Gokhale</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Onkar</namePart>
<namePart type="family">Litake</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aditya</namePart>
<namePart type="family">Mandke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dipali</namePart>
<namePart type="family">Kadam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Speech and Language Technologies for Dravidian Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="given">Raja</namePart>
<namePart type="family">Chakravarthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruba</namePart>
<namePart type="family">Priyadharshini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anand</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Madasamy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Parameswari</namePart>
<namePart type="family">Krishnamurthy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Sherly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sinnathamby</namePart>
<namePart type="family">Mahesan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper tries to address the problem of abusive comment detection in low-resource indic languages. Abusive comments are statements that are offensive to a person or a group of people. These comments are targeted toward individuals belonging to specific ethnicities, genders, caste, race, sexuality, etc. Abusive Comment Detection is a significant problem, especially with the recent rise in social media users. This paper presents the approach used by our team — Optimize_Prime, in the ACL 2022 shared task “Abusive Comment Detection in Tamil.” This task detects and classifies YouTube comments in Tamil and Tamil-English Codemixed format into multiple categories. We have used three methods to optimize our results: Ensemble models, Recurrent Neural Networks, and Transformers. In the Tamil data, MuRIL and XLM-RoBERTA were our best performing models with a macro-averaged f1 score of 0.43. Furthermore, for the Code-mixed data, MuRIL and M-BERT provided sublime results, with a macro-averaged f1 score of 0.45.</abstract>
<identifier type="citekey">patankar-etal-2022-optimize</identifier>
<identifier type="doi">10.18653/v1/2022.dravidianlangtech-1.36</identifier>
<location>
<url>https://aclanthology.org/2022.dravidianlangtech-1.36</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>235</start>
<end>239</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Optimize_Prime@DravidianLangTech-ACL2022: Abusive Comment Detection in Tamil
%A Patankar, Shantanu
%A Gokhale, Omkar
%A Litake, Onkar
%A Mandke, Aditya
%A Kadam, Dipali
%Y Chakravarthi, Bharathi Raja
%Y Priyadharshini, Ruba
%Y Madasamy, Anand Kumar
%Y Krishnamurthy, Parameswari
%Y Sherly, Elizabeth
%Y Mahesan, Sinnathamby
%S Proceedings of the Second Workshop on Speech and Language Technologies for Dravidian Languages
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F patankar-etal-2022-optimize
%X This paper tries to address the problem of abusive comment detection in low-resource indic languages. Abusive comments are statements that are offensive to a person or a group of people. These comments are targeted toward individuals belonging to specific ethnicities, genders, caste, race, sexuality, etc. Abusive Comment Detection is a significant problem, especially with the recent rise in social media users. This paper presents the approach used by our team — Optimize_Prime, in the ACL 2022 shared task “Abusive Comment Detection in Tamil.” This task detects and classifies YouTube comments in Tamil and Tamil-English Codemixed format into multiple categories. We have used three methods to optimize our results: Ensemble models, Recurrent Neural Networks, and Transformers. In the Tamil data, MuRIL and XLM-RoBERTA were our best performing models with a macro-averaged f1 score of 0.43. Furthermore, for the Code-mixed data, MuRIL and M-BERT provided sublime results, with a macro-averaged f1 score of 0.45.
%R 10.18653/v1/2022.dravidianlangtech-1.36
%U https://aclanthology.org/2022.dravidianlangtech-1.36
%U https://doi.org/10.18653/v1/2022.dravidianlangtech-1.36
%P 235-239
Markdown (Informal)
[Optimize_Prime@DravidianLangTech-ACL2022: Abusive Comment Detection in Tamil](https://aclanthology.org/2022.dravidianlangtech-1.36) (Patankar et al., DravidianLangTech 2022)
ACL