@inproceedings{kumari-2020-sonal,
title = "Sonal.kumari at {S}em{E}val-2020 Task 12: Social Media Multilingual Offensive Text Identification and Categorization Using Neural Network Models",
author = "Kumari, Sonal",
editor = "Herbelot, Aurelie and
Zhu, Xiaodan and
Palmer, Alexis and
Schneider, Nathan and
May, Jonathan and
Shutova, Ekaterina",
booktitle = "Proceedings of the Fourteenth Workshop on Semantic Evaluation",
month = dec,
year = "2020",
address = "Barcelona (online)",
publisher = "International Committee for Computational Linguistics",
url = "https://aclanthology.org/2020.semeval-1.285",
doi = "10.18653/v1/2020.semeval-1.285",
pages = "2146--2154",
abstract = "In this paper, we present our approaches and results for SemEval-2020 Task 12, Multilingual Offensive Language Identification in Social Media (OffensEval 2020). The OffensEval 2020 had three subtasks: A) Identifying the tweets to be offensive (OFF) or non-offensive (NOT) for Arabic, Danish, English, Greek, and Turkish languages, B) Detecting if the offensive tweet is targeted (TIN) or untargeted (UNT) for the English language, and C) Categorizing the offensive targeted tweets into three classes, namely: individual (IND), Group (GRP), or Other (OTH) for the English language. We participate in all the subtasks A, B, and C. In our solution, first we use the pre-trained BERT model for all subtasks, A, B, and C and then we apply the BiLSTM model with attention mechanism (Attn-BiLSTM) for the same. Our result demonstrates that the pre-trained model is not giving good results for all types of languages and is compute and memory intensive whereas the Attn-BiLSTM model is fast and gives good accuracy with fewer resources. The Attn-BiLSTM model is giving better accuracy for Arabic and Greek where the pre-trained model is not able to capture the complete context of these languages due to lower vocab-size.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kumari-2020-sonal">
<titleInfo>
<title>Sonal.kumari at SemEval-2020 Task 12: Social Media Multilingual Offensive Text Identification and Categorization Using Neural Network Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sonal</namePart>
<namePart type="family">Kumari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourteenth Workshop on Semantic Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aurelie</namePart>
<namePart type="family">Herbelot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaodan</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexis</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nathan</namePart>
<namePart type="family">Schneider</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="family">May</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona (online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we present our approaches and results for SemEval-2020 Task 12, Multilingual Offensive Language Identification in Social Media (OffensEval 2020). The OffensEval 2020 had three subtasks: A) Identifying the tweets to be offensive (OFF) or non-offensive (NOT) for Arabic, Danish, English, Greek, and Turkish languages, B) Detecting if the offensive tweet is targeted (TIN) or untargeted (UNT) for the English language, and C) Categorizing the offensive targeted tweets into three classes, namely: individual (IND), Group (GRP), or Other (OTH) for the English language. We participate in all the subtasks A, B, and C. In our solution, first we use the pre-trained BERT model for all subtasks, A, B, and C and then we apply the BiLSTM model with attention mechanism (Attn-BiLSTM) for the same. Our result demonstrates that the pre-trained model is not giving good results for all types of languages and is compute and memory intensive whereas the Attn-BiLSTM model is fast and gives good accuracy with fewer resources. The Attn-BiLSTM model is giving better accuracy for Arabic and Greek where the pre-trained model is not able to capture the complete context of these languages due to lower vocab-size.</abstract>
<identifier type="citekey">kumari-2020-sonal</identifier>
<identifier type="doi">10.18653/v1/2020.semeval-1.285</identifier>
<location>
<url>https://aclanthology.org/2020.semeval-1.285</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>2146</start>
<end>2154</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Sonal.kumari at SemEval-2020 Task 12: Social Media Multilingual Offensive Text Identification and Categorization Using Neural Network Models
%A Kumari, Sonal
%Y Herbelot, Aurelie
%Y Zhu, Xiaodan
%Y Palmer, Alexis
%Y Schneider, Nathan
%Y May, Jonathan
%Y Shutova, Ekaterina
%S Proceedings of the Fourteenth Workshop on Semantic Evaluation
%D 2020
%8 December
%I International Committee for Computational Linguistics
%C Barcelona (online)
%F kumari-2020-sonal
%X In this paper, we present our approaches and results for SemEval-2020 Task 12, Multilingual Offensive Language Identification in Social Media (OffensEval 2020). The OffensEval 2020 had three subtasks: A) Identifying the tweets to be offensive (OFF) or non-offensive (NOT) for Arabic, Danish, English, Greek, and Turkish languages, B) Detecting if the offensive tweet is targeted (TIN) or untargeted (UNT) for the English language, and C) Categorizing the offensive targeted tweets into three classes, namely: individual (IND), Group (GRP), or Other (OTH) for the English language. We participate in all the subtasks A, B, and C. In our solution, first we use the pre-trained BERT model for all subtasks, A, B, and C and then we apply the BiLSTM model with attention mechanism (Attn-BiLSTM) for the same. Our result demonstrates that the pre-trained model is not giving good results for all types of languages and is compute and memory intensive whereas the Attn-BiLSTM model is fast and gives good accuracy with fewer resources. The Attn-BiLSTM model is giving better accuracy for Arabic and Greek where the pre-trained model is not able to capture the complete context of these languages due to lower vocab-size.
%R 10.18653/v1/2020.semeval-1.285
%U https://aclanthology.org/2020.semeval-1.285
%U https://doi.org/10.18653/v1/2020.semeval-1.285
%P 2146-2154
Markdown (Informal)
[Sonal.kumari at SemEval-2020 Task 12: Social Media Multilingual Offensive Text Identification and Categorization Using Neural Network Models](https://aclanthology.org/2020.semeval-1.285) (Kumari, SemEval 2020)
ACL