@inproceedings{al-mandhari-etal-2024-offensive,
title = "Is it Offensive or Abusive? An Empirical Study of Hateful Language Detection of {A}rabic Social Media Texts",
author = "Al Mandhari, Salim and
El-Haj, Mo and
Rayson, Paul",
editor = "Mitkov, Ruslan and
Ezzini, Saad and
Ranasinghe, Tharindu and
Ezeani, Ignatius and
Khallaf, Nouran and
Acarturk, Cengiz and
Bradbury, Matthew and
El-Haj, Mo and
Rayson, Paul",
booktitle = "Proceedings of the First International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security",
month = jul,
year = "2024",
address = "Lancaster, UK",
publisher = "International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security",
url = "https://aclanthology.org/2024.nlpaics-1.16/",
pages = "137--146",
abstract = {Among many potential subjects studied in Sentiment Analysis, widespread offensive and abusive language on social media has triggered interest in reducing its risks on users; children in particular. This paper centres on distinguishing between offensive and abusive language detec- tion within Arabic social media texts through the employment of various machine and deep learning techniques. The techniques include Na{\"i}ve Bayes (NB), Support Vector Machine (SVM), fastText, keras, and RoBERTa XML multilingual embeddings, which have demon- strated superior performance compared to other statistical machine learning methods and dif- ferent kinds of embeddings like fastText. The methods were implemented on two separate corpora from YouTube comments totalling 47K comments. The results demonstrated that all models, except NB, reached an accuracy of 82{\%}. It was also shown that word tri-grams en- hance classification performance, though other tuning techniques were applied such as TF-IDF and grid-search. The linguistic findings, aimed at distinguishing between offensive and abu- sive language, were consistent with machine learning (ML) performance, which effectively classified the two distinct classes of sentiment: offensive and abusive.}
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="al-mandhari-etal-2024-offensive">
<titleInfo>
<title>Is it Offensive or Abusive? An Empirical Study of Hateful Language Detection of Arabic Social Media Texts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Salim</namePart>
<namePart type="family">Al Mandhari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mo</namePart>
<namePart type="family">El-Haj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Rayson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saad</namePart>
<namePart type="family">Ezzini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tharindu</namePart>
<namePart type="family">Ranasinghe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ignatius</namePart>
<namePart type="family">Ezeani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nouran</namePart>
<namePart type="family">Khallaf</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cengiz</namePart>
<namePart type="family">Acarturk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="family">Bradbury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mo</namePart>
<namePart type="family">El-Haj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Rayson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security</publisher>
<place>
<placeTerm type="text">Lancaster, UK</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Among many potential subjects studied in Sentiment Analysis, widespread offensive and abusive language on social media has triggered interest in reducing its risks on users; children in particular. This paper centres on distinguishing between offensive and abusive language detec- tion within Arabic social media texts through the employment of various machine and deep learning techniques. The techniques include Naïve Bayes (NB), Support Vector Machine (SVM), fastText, keras, and RoBERTa XML multilingual embeddings, which have demon- strated superior performance compared to other statistical machine learning methods and dif- ferent kinds of embeddings like fastText. The methods were implemented on two separate corpora from YouTube comments totalling 47K comments. The results demonstrated that all models, except NB, reached an accuracy of 82%. It was also shown that word tri-grams en- hance classification performance, though other tuning techniques were applied such as TF-IDF and grid-search. The linguistic findings, aimed at distinguishing between offensive and abu- sive language, were consistent with machine learning (ML) performance, which effectively classified the two distinct classes of sentiment: offensive and abusive.</abstract>
<identifier type="citekey">al-mandhari-etal-2024-offensive</identifier>
<location>
<url>https://aclanthology.org/2024.nlpaics-1.16/</url>
</location>
<part>
<date>2024-07</date>
<extent unit="page">
<start>137</start>
<end>146</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Is it Offensive or Abusive? An Empirical Study of Hateful Language Detection of Arabic Social Media Texts
%A Al Mandhari, Salim
%A El-Haj, Mo
%A Rayson, Paul
%Y Mitkov, Ruslan
%Y Ezzini, Saad
%Y Ranasinghe, Tharindu
%Y Ezeani, Ignatius
%Y Khallaf, Nouran
%Y Acarturk, Cengiz
%Y Bradbury, Matthew
%Y El-Haj, Mo
%Y Rayson, Paul
%S Proceedings of the First International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security
%D 2024
%8 July
%I International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security
%C Lancaster, UK
%F al-mandhari-etal-2024-offensive
%X Among many potential subjects studied in Sentiment Analysis, widespread offensive and abusive language on social media has triggered interest in reducing its risks on users; children in particular. This paper centres on distinguishing between offensive and abusive language detec- tion within Arabic social media texts through the employment of various machine and deep learning techniques. The techniques include Naïve Bayes (NB), Support Vector Machine (SVM), fastText, keras, and RoBERTa XML multilingual embeddings, which have demon- strated superior performance compared to other statistical machine learning methods and dif- ferent kinds of embeddings like fastText. The methods were implemented on two separate corpora from YouTube comments totalling 47K comments. The results demonstrated that all models, except NB, reached an accuracy of 82%. It was also shown that word tri-grams en- hance classification performance, though other tuning techniques were applied such as TF-IDF and grid-search. The linguistic findings, aimed at distinguishing between offensive and abu- sive language, were consistent with machine learning (ML) performance, which effectively classified the two distinct classes of sentiment: offensive and abusive.
%U https://aclanthology.org/2024.nlpaics-1.16/
%P 137-146
Markdown (Informal)
[Is it Offensive or Abusive? An Empirical Study of Hateful Language Detection of Arabic Social Media Texts](https://aclanthology.org/2024.nlpaics-1.16/) (Al Mandhari et al., NLPAICS 2024)
ACL