@inproceedings{ibrahim-etal-2020-alexu,
title = "{A}lex{U}-{B}ack{T}ranslation-{TL} at {S}em{E}val-2020 Task 12: Improving Offensive Language Detection Using Data Augmentation and Transfer Learning",
author = "Ibrahim, Mai and
Torki, Marwan and
El-Makky, Nagwa",
editor = "Herbelot, Aurelie and
Zhu, Xiaodan and
Palmer, Alexis and
Schneider, Nathan and
May, Jonathan and
Shutova, Ekaterina",
booktitle = "Proceedings of the Fourteenth Workshop on Semantic Evaluation",
month = dec,
year = "2020",
address = "Barcelona (online)",
publisher = "International Committee for Computational Linguistics",
url = "https://aclanthology.org/2020.semeval-1.248",
doi = "10.18653/v1/2020.semeval-1.248",
pages = "1881--1890",
abstract = "Social media platforms, online news commenting spaces, and many other public forums have become widely known for issues of abusive behavior such as cyber-bullying and personal attacks. In this paper, we use the annotated tweets of the Offensive Language Identification Dataset (OLID) to train three levels of deep learning classifiers to solve the three sub-tasks associated with the dataset. Sub-task A is to determine if the tweet is toxic or not. Then, for offensive tweets, sub-task B requires determining whether the toxicity is targeted. Finally, for sub-task C, we predict the target of the offense; i.e. a group, individual, or other entity. In our solution, we tackle the problem of class imbalance in the dataset by using back translation for data augmentation and utilizing the fine-tuned BERT model in an ensemble of deep learning classifiers. We used this solution to participate in the three English sub-tasks of SemEval-2020 task 12. The proposed solution achieved 0.91393, 0.6300, and 0.57607 macro F1-average in sub-tasks A, B, and C respectively. We achieved the 9th, 14th, and 22nd places for sub-tasks A, B and C respectively.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ibrahim-etal-2020-alexu">
<titleInfo>
<title>AlexU-BackTranslation-TL at SemEval-2020 Task 12: Improving Offensive Language Detection Using Data Augmentation and Transfer Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mai</namePart>
<namePart type="family">Ibrahim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marwan</namePart>
<namePart type="family">Torki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nagwa</namePart>
<namePart type="family">El-Makky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourteenth Workshop on Semantic Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aurelie</namePart>
<namePart type="family">Herbelot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaodan</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexis</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nathan</namePart>
<namePart type="family">Schneider</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="family">May</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona (online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Social media platforms, online news commenting spaces, and many other public forums have become widely known for issues of abusive behavior such as cyber-bullying and personal attacks. In this paper, we use the annotated tweets of the Offensive Language Identification Dataset (OLID) to train three levels of deep learning classifiers to solve the three sub-tasks associated with the dataset. Sub-task A is to determine if the tweet is toxic or not. Then, for offensive tweets, sub-task B requires determining whether the toxicity is targeted. Finally, for sub-task C, we predict the target of the offense; i.e. a group, individual, or other entity. In our solution, we tackle the problem of class imbalance in the dataset by using back translation for data augmentation and utilizing the fine-tuned BERT model in an ensemble of deep learning classifiers. We used this solution to participate in the three English sub-tasks of SemEval-2020 task 12. The proposed solution achieved 0.91393, 0.6300, and 0.57607 macro F1-average in sub-tasks A, B, and C respectively. We achieved the 9th, 14th, and 22nd places for sub-tasks A, B and C respectively.</abstract>
<identifier type="citekey">ibrahim-etal-2020-alexu</identifier>
<identifier type="doi">10.18653/v1/2020.semeval-1.248</identifier>
<location>
<url>https://aclanthology.org/2020.semeval-1.248</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>1881</start>
<end>1890</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T AlexU-BackTranslation-TL at SemEval-2020 Task 12: Improving Offensive Language Detection Using Data Augmentation and Transfer Learning
%A Ibrahim, Mai
%A Torki, Marwan
%A El-Makky, Nagwa
%Y Herbelot, Aurelie
%Y Zhu, Xiaodan
%Y Palmer, Alexis
%Y Schneider, Nathan
%Y May, Jonathan
%Y Shutova, Ekaterina
%S Proceedings of the Fourteenth Workshop on Semantic Evaluation
%D 2020
%8 December
%I International Committee for Computational Linguistics
%C Barcelona (online)
%F ibrahim-etal-2020-alexu
%X Social media platforms, online news commenting spaces, and many other public forums have become widely known for issues of abusive behavior such as cyber-bullying and personal attacks. In this paper, we use the annotated tweets of the Offensive Language Identification Dataset (OLID) to train three levels of deep learning classifiers to solve the three sub-tasks associated with the dataset. Sub-task A is to determine if the tweet is toxic or not. Then, for offensive tweets, sub-task B requires determining whether the toxicity is targeted. Finally, for sub-task C, we predict the target of the offense; i.e. a group, individual, or other entity. In our solution, we tackle the problem of class imbalance in the dataset by using back translation for data augmentation and utilizing the fine-tuned BERT model in an ensemble of deep learning classifiers. We used this solution to participate in the three English sub-tasks of SemEval-2020 task 12. The proposed solution achieved 0.91393, 0.6300, and 0.57607 macro F1-average in sub-tasks A, B, and C respectively. We achieved the 9th, 14th, and 22nd places for sub-tasks A, B and C respectively.
%R 10.18653/v1/2020.semeval-1.248
%U https://aclanthology.org/2020.semeval-1.248
%U https://doi.org/10.18653/v1/2020.semeval-1.248
%P 1881-1890
Markdown (Informal)
[AlexU-BackTranslation-TL at SemEval-2020 Task 12: Improving Offensive Language Detection Using Data Augmentation and Transfer Learning](https://aclanthology.org/2020.semeval-1.248) (Ibrahim et al., SemEval 2020)
ACL