@inproceedings{dave-etal-2021-irnlp,
title = "{IRNLP}{\_}{DAIICT}@{D}ravidian{L}ang{T}ech-{EACL}2021:Offensive Language identification in {D}ravidian Languages using {TF}-{IDF} Char N-grams and {M}u{RIL}",
author = "Dave, Bhargav and
Bhat, Shripad and
Majumder, Prasenjit",
editor = "Chakravarthi, Bharathi Raja and
Priyadharshini, Ruba and
Kumar M, Anand and
Krishnamurthy, Parameswari and
Sherly, Elizabeth",
booktitle = "Proceedings of the First Workshop on Speech and Language Technologies for Dravidian Languages",
month = apr,
year = "2021",
address = "Kyiv",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.dravidianlangtech-1.37",
pages = "266--269",
abstract = "This paper presents the participation of the IRNLPDAIICT team from Information Retrieval and Natural Language Processing lab at DA-IICT, India in DravidianLangTech-EACL2021 Offensive Language identification in Dravidian Languages. The aim of this shared task is to identify Offensive Language from a code-mixed data-set of YouTube comments. The task is to classify comments into Not Offensive (NO), Offensive Untargetede(OU), Offensive Targeted Individual (OTI), Offensive Targeted Group (OTG), Offensive Targeted Others (OTO), Other Language (OL) for three Dravidian languages: Kannada, Malayalam and Tamil. We use TF-IDF character n-grams and pretrained MuRIL embeddings for text representation and Logistic Regression and Linear SVM for classification. Our best approach achieved Ninth, Third and Eighth with weighted F1 score of 0.64, 0.95 and 0.71in Kannada, Malayalam and Tamil on test dataset respectively.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dave-etal-2021-irnlp">
<titleInfo>
<title>IRNLP_DAIICT@DravidianLangTech-EACL2021:Offensive Language identification in Dravidian Languages using TF-IDF Char N-grams and MuRIL</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bhargav</namePart>
<namePart type="family">Dave</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shripad</namePart>
<namePart type="family">Bhat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Prasenjit</namePart>
<namePart type="family">Majumder</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Speech and Language Technologies for Dravidian Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="given">Raja</namePart>
<namePart type="family">Chakravarthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruba</namePart>
<namePart type="family">Priyadharshini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anand</namePart>
<namePart type="family">Kumar M</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Parameswari</namePart>
<namePart type="family">Krishnamurthy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Sherly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Kyiv</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents the participation of the IRNLPDAIICT team from Information Retrieval and Natural Language Processing lab at DA-IICT, India in DravidianLangTech-EACL2021 Offensive Language identification in Dravidian Languages. The aim of this shared task is to identify Offensive Language from a code-mixed data-set of YouTube comments. The task is to classify comments into Not Offensive (NO), Offensive Untargetede(OU), Offensive Targeted Individual (OTI), Offensive Targeted Group (OTG), Offensive Targeted Others (OTO), Other Language (OL) for three Dravidian languages: Kannada, Malayalam and Tamil. We use TF-IDF character n-grams and pretrained MuRIL embeddings for text representation and Logistic Regression and Linear SVM for classification. Our best approach achieved Ninth, Third and Eighth with weighted F1 score of 0.64, 0.95 and 0.71in Kannada, Malayalam and Tamil on test dataset respectively.</abstract>
<identifier type="citekey">dave-etal-2021-irnlp</identifier>
<location>
<url>https://aclanthology.org/2021.dravidianlangtech-1.37</url>
</location>
<part>
<date>2021-04</date>
<extent unit="page">
<start>266</start>
<end>269</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T IRNLP_DAIICT@DravidianLangTech-EACL2021:Offensive Language identification in Dravidian Languages using TF-IDF Char N-grams and MuRIL
%A Dave, Bhargav
%A Bhat, Shripad
%A Majumder, Prasenjit
%Y Chakravarthi, Bharathi Raja
%Y Priyadharshini, Ruba
%Y Kumar M, Anand
%Y Krishnamurthy, Parameswari
%Y Sherly, Elizabeth
%S Proceedings of the First Workshop on Speech and Language Technologies for Dravidian Languages
%D 2021
%8 April
%I Association for Computational Linguistics
%C Kyiv
%F dave-etal-2021-irnlp
%X This paper presents the participation of the IRNLPDAIICT team from Information Retrieval and Natural Language Processing lab at DA-IICT, India in DravidianLangTech-EACL2021 Offensive Language identification in Dravidian Languages. The aim of this shared task is to identify Offensive Language from a code-mixed data-set of YouTube comments. The task is to classify comments into Not Offensive (NO), Offensive Untargetede(OU), Offensive Targeted Individual (OTI), Offensive Targeted Group (OTG), Offensive Targeted Others (OTO), Other Language (OL) for three Dravidian languages: Kannada, Malayalam and Tamil. We use TF-IDF character n-grams and pretrained MuRIL embeddings for text representation and Logistic Regression and Linear SVM for classification. Our best approach achieved Ninth, Third and Eighth with weighted F1 score of 0.64, 0.95 and 0.71in Kannada, Malayalam and Tamil on test dataset respectively.
%U https://aclanthology.org/2021.dravidianlangtech-1.37
%P 266-269
Markdown (Informal)
[IRNLP_DAIICT@DravidianLangTech-EACL2021:Offensive Language identification in Dravidian Languages using TF-IDF Char N-grams and MuRIL](https://aclanthology.org/2021.dravidianlangtech-1.37) (Dave et al., DravidianLangTech 2021)
ACL