@inproceedings{alanazi-etal-2026-hybrid,
title = "A Hybrid Confidence-Aware Framework for {A}rabic Toxicity Detection in Social Media",
author = "Alanazi, Fawzia Zaal and
Alamri, Asma Mohammed and
Bin Saleh, Arwa and
Alharbi, Abdullah I.",
booktitle = "Proceedings of the 2nd Workshop on {NLP} for Languages Using {A}rabic Script",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.abjadnlp-1.42/",
pages = "364--370",
abstract = "Automatic detection of toxic and offensive content in Arabic social media is a challenging task due to rich morphology, dialectal variation, and noisy writing styles. While transformer-based language models have achieved strong performance, they often produce uncertain predictions in borderline cases. This paper presents a hybrid framework for Arabic toxicity detection that combines a pretrained Arabic-specific transformer model with a confidence-aware rule-based mechanism. The proposed approach activates automatically induced lexical rules only when the model prediction falls within a predefined gray zone of uncertainty, preserving neural dominance while improving robustness and interpretability. Experiments conducted on a manually annotated dataset of 35,000 Arabic posts demonstrate that the hybrid approach achieves consistent improvements over the baseline model, particularly in reducing false negatives for toxic content. The results indicate that selective rule activation is an effective strategy for enhancing reliability in real-world Arabic social media moderation systems."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="alanazi-etal-2026-hybrid">
<titleInfo>
<title>A Hybrid Confidence-Aware Framework for Arabic Toxicity Detection in Social Media</title>
</titleInfo>
<name type="personal">
<namePart type="given">Fawzia</namePart>
<namePart type="given">Zaal</namePart>
<namePart type="family">Alanazi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asma</namePart>
<namePart type="given">Mohammed</namePart>
<namePart type="family">Alamri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arwa</namePart>
<namePart type="family">Bin Saleh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abdullah</namePart>
<namePart type="given">I</namePart>
<namePart type="family">Alharbi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on NLP for Languages Using Arabic Script</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Automatic detection of toxic and offensive content in Arabic social media is a challenging task due to rich morphology, dialectal variation, and noisy writing styles. While transformer-based language models have achieved strong performance, they often produce uncertain predictions in borderline cases. This paper presents a hybrid framework for Arabic toxicity detection that combines a pretrained Arabic-specific transformer model with a confidence-aware rule-based mechanism. The proposed approach activates automatically induced lexical rules only when the model prediction falls within a predefined gray zone of uncertainty, preserving neural dominance while improving robustness and interpretability. Experiments conducted on a manually annotated dataset of 35,000 Arabic posts demonstrate that the hybrid approach achieves consistent improvements over the baseline model, particularly in reducing false negatives for toxic content. The results indicate that selective rule activation is an effective strategy for enhancing reliability in real-world Arabic social media moderation systems.</abstract>
<identifier type="citekey">alanazi-etal-2026-hybrid</identifier>
<location>
<url>https://aclanthology.org/2026.abjadnlp-1.42/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>364</start>
<end>370</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Hybrid Confidence-Aware Framework for Arabic Toxicity Detection in Social Media
%A Alanazi, Fawzia Zaal
%A Alamri, Asma Mohammed
%A Bin Saleh, Arwa
%A Alharbi, Abdullah I.
%S Proceedings of the 2nd Workshop on NLP for Languages Using Arabic Script
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%F alanazi-etal-2026-hybrid
%X Automatic detection of toxic and offensive content in Arabic social media is a challenging task due to rich morphology, dialectal variation, and noisy writing styles. While transformer-based language models have achieved strong performance, they often produce uncertain predictions in borderline cases. This paper presents a hybrid framework for Arabic toxicity detection that combines a pretrained Arabic-specific transformer model with a confidence-aware rule-based mechanism. The proposed approach activates automatically induced lexical rules only when the model prediction falls within a predefined gray zone of uncertainty, preserving neural dominance while improving robustness and interpretability. Experiments conducted on a manually annotated dataset of 35,000 Arabic posts demonstrate that the hybrid approach achieves consistent improvements over the baseline model, particularly in reducing false negatives for toxic content. The results indicate that selective rule activation is an effective strategy for enhancing reliability in real-world Arabic social media moderation systems.
%U https://aclanthology.org/2026.abjadnlp-1.42/
%P 364-370
Markdown (Informal)
[A Hybrid Confidence-Aware Framework for Arabic Toxicity Detection in Social Media](https://aclanthology.org/2026.abjadnlp-1.42/) (Alanazi et al., AbjadNLP 2026)
ACL