@inproceedings{sivalingam-thavareesan-2021-offtamil,
title = "{O}ff{T}amil@{D}ravidean{L}ang{T}ech-{EASL}2021: Offensive Language Identification in {T}amil Text",
author = "Sivalingam, Disne and
Thavareesan, Sajeetha",
editor = "Chakravarthi, Bharathi Raja and
Priyadharshini, Ruba and
Kumar M, Anand and
Krishnamurthy, Parameswari and
Sherly, Elizabeth",
booktitle = "Proceedings of the First Workshop on Speech and Language Technologies for Dravidian Languages",
month = apr,
year = "2021",
address = "Kyiv",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.dravidianlangtech-1.51",
pages = "346--351",
abstract = "In the last few decades, Code-Mixed Offensive texts are used penetratingly in social media posts. Social media platforms and online communities showed much interest on offensive text identification in recent years. Consequently, research community is also interested in identifying such content and also contributed to the development of corpora. Many publicly available corpora are there for research on identifying offensive text written in English language but rare for low resourced languages like Tamil. The first code-mixed offensive text for Dravidian languages are developed by shared task organizers which is used for this study. This study focused on offensive language identification on code-mixed low-resourced Dravidian language Tamil using four classifiers (Support Vector Machine, random forest, k- Nearest Neighbour and Naive Bayes) using chi{\^{}}2 feature selection technique along with BoW and TF-IDF feature representation techniques using different combinations of n-grams. This proposed model achieved an accuracy of 76.96{\%} while using linear SVM with TF-IDF feature representation technique.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sivalingam-thavareesan-2021-offtamil">
<titleInfo>
<title>OffTamil@DravideanLangTech-EASL2021: Offensive Language Identification in Tamil Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Disne</namePart>
<namePart type="family">Sivalingam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sajeetha</namePart>
<namePart type="family">Thavareesan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Speech and Language Technologies for Dravidian Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="given">Raja</namePart>
<namePart type="family">Chakravarthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruba</namePart>
<namePart type="family">Priyadharshini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anand</namePart>
<namePart type="family">Kumar M</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Parameswari</namePart>
<namePart type="family">Krishnamurthy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Sherly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Kyiv</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In the last few decades, Code-Mixed Offensive texts are used penetratingly in social media posts. Social media platforms and online communities showed much interest on offensive text identification in recent years. Consequently, research community is also interested in identifying such content and also contributed to the development of corpora. Many publicly available corpora are there for research on identifying offensive text written in English language but rare for low resourced languages like Tamil. The first code-mixed offensive text for Dravidian languages are developed by shared task organizers which is used for this study. This study focused on offensive language identification on code-mixed low-resourced Dravidian language Tamil using four classifiers (Support Vector Machine, random forest, k- Nearest Neighbour and Naive Bayes) using chi\² feature selection technique along with BoW and TF-IDF feature representation techniques using different combinations of n-grams. This proposed model achieved an accuracy of 76.96% while using linear SVM with TF-IDF feature representation technique.</abstract>
<identifier type="citekey">sivalingam-thavareesan-2021-offtamil</identifier>
<location>
<url>https://aclanthology.org/2021.dravidianlangtech-1.51</url>
</location>
<part>
<date>2021-04</date>
<extent unit="page">
<start>346</start>
<end>351</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T OffTamil@DravideanLangTech-EASL2021: Offensive Language Identification in Tamil Text
%A Sivalingam, Disne
%A Thavareesan, Sajeetha
%Y Chakravarthi, Bharathi Raja
%Y Priyadharshini, Ruba
%Y Kumar M, Anand
%Y Krishnamurthy, Parameswari
%Y Sherly, Elizabeth
%S Proceedings of the First Workshop on Speech and Language Technologies for Dravidian Languages
%D 2021
%8 April
%I Association for Computational Linguistics
%C Kyiv
%F sivalingam-thavareesan-2021-offtamil
%X In the last few decades, Code-Mixed Offensive texts are used penetratingly in social media posts. Social media platforms and online communities showed much interest on offensive text identification in recent years. Consequently, research community is also interested in identifying such content and also contributed to the development of corpora. Many publicly available corpora are there for research on identifying offensive text written in English language but rare for low resourced languages like Tamil. The first code-mixed offensive text for Dravidian languages are developed by shared task organizers which is used for this study. This study focused on offensive language identification on code-mixed low-resourced Dravidian language Tamil using four classifiers (Support Vector Machine, random forest, k- Nearest Neighbour and Naive Bayes) using chi\² feature selection technique along with BoW and TF-IDF feature representation techniques using different combinations of n-grams. This proposed model achieved an accuracy of 76.96% while using linear SVM with TF-IDF feature representation technique.
%U https://aclanthology.org/2021.dravidianlangtech-1.51
%P 346-351
Markdown (Informal)
[OffTamil@DravideanLangTech-EASL2021: Offensive Language Identification in Tamil Text](https://aclanthology.org/2021.dravidianlangtech-1.51) (Sivalingam & Thavareesan, DravidianLangTech 2021)
ACL