@inproceedings{huang-bai-2021-hub,
title = "{HUB}@{D}ravidian{L}ang{T}ech-{EACL}2021: Identify and Classify Offensive Text in Multilingual Code Mixing in Social Media",
author = "Huang, Bo and
Bai, Yang",
editor = "Chakravarthi, Bharathi Raja and
Priyadharshini, Ruba and
Kumar M, Anand and
Krishnamurthy, Parameswari and
Sherly, Elizabeth",
booktitle = "Proceedings of the First Workshop on Speech and Language Technologies for Dravidian Languages",
month = apr,
year = "2021",
address = "Kyiv",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.dravidianlangtech-1.27",
pages = "203--209",
abstract = "This paper introduces the system description of the HUB team participating in DravidianLangTech - EACL2021: Offensive Language Identification in Dravidian Languages. The theme of this shared task is the detection of offensive content in social media. Among the known tasks related to offensive speech detection, this is the first task to detect offensive comments posted in social media comments in the Dravidian language. The task organizer team provided us with the code-mixing task data set mainly composed of three different languages: Malayalam, Kannada, and Tamil. The tasks on the code mixed data in these three different languages can be seen as three different comment/post-level classification tasks. The task on the Malayalam data set is a five-category classification task, and the Kannada and Tamil language data sets are two six-category classification tasks. Based on our analysis of the task description and task data set, we chose to use the multilingual BERT model to complete this task. In this paper, we will discuss our fine-tuning methods, models, experiments, and results.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="huang-bai-2021-hub">
<titleInfo>
<title>HUB@DravidianLangTech-EACL2021: Identify and Classify Offensive Text in Multilingual Code Mixing in Social Media</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bo</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Bai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Speech and Language Technologies for Dravidian Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="given">Raja</namePart>
<namePart type="family">Chakravarthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruba</namePart>
<namePart type="family">Priyadharshini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anand</namePart>
<namePart type="family">Kumar M</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Parameswari</namePart>
<namePart type="family">Krishnamurthy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Sherly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Kyiv</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper introduces the system description of the HUB team participating in DravidianLangTech - EACL2021: Offensive Language Identification in Dravidian Languages. The theme of this shared task is the detection of offensive content in social media. Among the known tasks related to offensive speech detection, this is the first task to detect offensive comments posted in social media comments in the Dravidian language. The task organizer team provided us with the code-mixing task data set mainly composed of three different languages: Malayalam, Kannada, and Tamil. The tasks on the code mixed data in these three different languages can be seen as three different comment/post-level classification tasks. The task on the Malayalam data set is a five-category classification task, and the Kannada and Tamil language data sets are two six-category classification tasks. Based on our analysis of the task description and task data set, we chose to use the multilingual BERT model to complete this task. In this paper, we will discuss our fine-tuning methods, models, experiments, and results.</abstract>
<identifier type="citekey">huang-bai-2021-hub</identifier>
<location>
<url>https://aclanthology.org/2021.dravidianlangtech-1.27</url>
</location>
<part>
<date>2021-04</date>
<extent unit="page">
<start>203</start>
<end>209</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T HUB@DravidianLangTech-EACL2021: Identify and Classify Offensive Text in Multilingual Code Mixing in Social Media
%A Huang, Bo
%A Bai, Yang
%Y Chakravarthi, Bharathi Raja
%Y Priyadharshini, Ruba
%Y Kumar M, Anand
%Y Krishnamurthy, Parameswari
%Y Sherly, Elizabeth
%S Proceedings of the First Workshop on Speech and Language Technologies for Dravidian Languages
%D 2021
%8 April
%I Association for Computational Linguistics
%C Kyiv
%F huang-bai-2021-hub
%X This paper introduces the system description of the HUB team participating in DravidianLangTech - EACL2021: Offensive Language Identification in Dravidian Languages. The theme of this shared task is the detection of offensive content in social media. Among the known tasks related to offensive speech detection, this is the first task to detect offensive comments posted in social media comments in the Dravidian language. The task organizer team provided us with the code-mixing task data set mainly composed of three different languages: Malayalam, Kannada, and Tamil. The tasks on the code mixed data in these three different languages can be seen as three different comment/post-level classification tasks. The task on the Malayalam data set is a five-category classification task, and the Kannada and Tamil language data sets are two six-category classification tasks. Based on our analysis of the task description and task data set, we chose to use the multilingual BERT model to complete this task. In this paper, we will discuss our fine-tuning methods, models, experiments, and results.
%U https://aclanthology.org/2021.dravidianlangtech-1.27
%P 203-209
Markdown (Informal)
[HUB@DravidianLangTech-EACL2021: Identify and Classify Offensive Text in Multilingual Code Mixing in Social Media](https://aclanthology.org/2021.dravidianlangtech-1.27) (Huang & Bai, DravidianLangTech 2021)
ACL