@inproceedings{saeed-etal-2020-osact4,
title = "{OSACT}4 Shared Tasks: Ensembled Stacked Classification for Offensive and Hate Speech in {A}rabic Tweets",
author = "Saeed, Hafiz Hassaan and
Calders, Toon and
Kamiran, Faisal",
editor = "Al-Khalifa, Hend and
Magdy, Walid and
Darwish, Kareem and
Elsayed, Tamer and
Mubarak, Hamdy",
booktitle = "Proceedings of the 4th Workshop on Open-Source Arabic Corpora and Processing Tools, with a Shared Task on Offensive Language Detection",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resource Association",
url = "https://aclanthology.org/2020.osact-1.11",
pages = "71--75",
abstract = "In this paper, we describe our submission for the OCAST4 2020 shared tasks on offensive language and hate speech detection in the Arabic language. Our solution builds upon combining a number of deep learning models using pre-trained word vectors. To improve the word representation and increase word coverage, we compare a number of existing pre-trained word embeddings and finally concatenate the two empirically best among them. To avoid under- as well as over-fitting, we train each deep model multiple times, and we include the optimization of the decision threshold into the training process. The predictions of the resulting models are then combined into a tuned ensemble by stacking a classifier on top of the predictions by these base models. We name our approach {``}ESOTP{''} (Ensembled Stacking classifier over Optimized Thresholded Predictions of multiple deep models). The resulting ESOTP-based system ranked 6th out of 35 on the shared task of Offensive Language detection (sub-task A) and 5th out of 30 on Hate Speech Detection (sub-task B).",
language = "English",
ISBN = "979-10-95546-51-1",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="saeed-etal-2020-osact4">
<titleInfo>
<title>OSACT4 Shared Tasks: Ensembled Stacked Classification for Offensive and Hate Speech in Arabic Tweets</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hafiz</namePart>
<namePart type="given">Hassaan</namePart>
<namePart type="family">Saeed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Toon</namePart>
<namePart type="family">Calders</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Faisal</namePart>
<namePart type="family">Kamiran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Workshop on Open-Source Arabic Corpora and Processing Tools, with a Shared Task on Offensive Language Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Walid</namePart>
<namePart type="family">Magdy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kareem</namePart>
<namePart type="family">Darwish</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tamer</namePart>
<namePart type="family">Elsayed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hamdy</namePart>
<namePart type="family">Mubarak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resource Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-51-1</identifier>
</relatedItem>
<abstract>In this paper, we describe our submission for the OCAST4 2020 shared tasks on offensive language and hate speech detection in the Arabic language. Our solution builds upon combining a number of deep learning models using pre-trained word vectors. To improve the word representation and increase word coverage, we compare a number of existing pre-trained word embeddings and finally concatenate the two empirically best among them. To avoid under- as well as over-fitting, we train each deep model multiple times, and we include the optimization of the decision threshold into the training process. The predictions of the resulting models are then combined into a tuned ensemble by stacking a classifier on top of the predictions by these base models. We name our approach “ESOTP” (Ensembled Stacking classifier over Optimized Thresholded Predictions of multiple deep models). The resulting ESOTP-based system ranked 6th out of 35 on the shared task of Offensive Language detection (sub-task A) and 5th out of 30 on Hate Speech Detection (sub-task B).</abstract>
<identifier type="citekey">saeed-etal-2020-osact4</identifier>
<location>
<url>https://aclanthology.org/2020.osact-1.11</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>71</start>
<end>75</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T OSACT4 Shared Tasks: Ensembled Stacked Classification for Offensive and Hate Speech in Arabic Tweets
%A Saeed, Hafiz Hassaan
%A Calders, Toon
%A Kamiran, Faisal
%Y Al-Khalifa, Hend
%Y Magdy, Walid
%Y Darwish, Kareem
%Y Elsayed, Tamer
%Y Mubarak, Hamdy
%S Proceedings of the 4th Workshop on Open-Source Arabic Corpora and Processing Tools, with a Shared Task on Offensive Language Detection
%D 2020
%8 May
%I European Language Resource Association
%C Marseille, France
%@ 979-10-95546-51-1
%G English
%F saeed-etal-2020-osact4
%X In this paper, we describe our submission for the OCAST4 2020 shared tasks on offensive language and hate speech detection in the Arabic language. Our solution builds upon combining a number of deep learning models using pre-trained word vectors. To improve the word representation and increase word coverage, we compare a number of existing pre-trained word embeddings and finally concatenate the two empirically best among them. To avoid under- as well as over-fitting, we train each deep model multiple times, and we include the optimization of the decision threshold into the training process. The predictions of the resulting models are then combined into a tuned ensemble by stacking a classifier on top of the predictions by these base models. We name our approach “ESOTP” (Ensembled Stacking classifier over Optimized Thresholded Predictions of multiple deep models). The resulting ESOTP-based system ranked 6th out of 35 on the shared task of Offensive Language detection (sub-task A) and 5th out of 30 on Hate Speech Detection (sub-task B).
%U https://aclanthology.org/2020.osact-1.11
%P 71-75
Markdown (Informal)
[OSACT4 Shared Tasks: Ensembled Stacked Classification for Offensive and Hate Speech in Arabic Tweets](https://aclanthology.org/2020.osact-1.11) (Saeed et al., OSACT 2020)
ACL