@inproceedings{francies-etal-2026-reglat,
title = "{REGLAT} at {A}bjad{G}en{E}val: Multi-Model Ensemble Approach for {A}rabic {AI}-Generated Text Detection",
author = "Francies, Mariam Labib and
Ashraf, Nsrin and
Fetouh, Ahmed Megahed and
Nayel, Hamada",
booktitle = "Proceedings of the 2nd Workshop on {NLP} for Languages Using {A}rabic Script",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.abjadnlp-1.62/",
pages = "493--496",
abstract = "The rapid advancement of large language models necessitates robust methods for detecting AI-generated Arabic text. This paper presents our system for distinguishing human-written from machine-generated Arabic content. We propose a weighted ensemble combining AraBERTv2 and BERT-base-arabic, trained via 5-fold stratified cross-validation with class-balanced loss functions. Our methodology incorporates Arabic text normalization, strategic data augmentation using 16,678 samples from external scientific abstracts, and threshold optimization prioritizing recall. On the official test set, our system achieved an F1-score of 0.763, an accuracy of 0.695, a precision of 0.624, and a recall of 0.980, demonstrating strong detection of machine-generated texts with minimal false negatives at the cost of elevated false positives. Analysis reveals critical insights into precision-recall trade-offs and challenges in cross-domain generalization for Arabic AI text detection."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="francies-etal-2026-reglat">
<titleInfo>
<title>REGLAT at AbjadGenEval: Multi-Model Ensemble Approach for Arabic AI-Generated Text Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mariam</namePart>
<namePart type="given">Labib</namePart>
<namePart type="family">Francies</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nsrin</namePart>
<namePart type="family">Ashraf</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahmed</namePart>
<namePart type="given">Megahed</namePart>
<namePart type="family">Fetouh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hamada</namePart>
<namePart type="family">Nayel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on NLP for Languages Using Arabic Script</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The rapid advancement of large language models necessitates robust methods for detecting AI-generated Arabic text. This paper presents our system for distinguishing human-written from machine-generated Arabic content. We propose a weighted ensemble combining AraBERTv2 and BERT-base-arabic, trained via 5-fold stratified cross-validation with class-balanced loss functions. Our methodology incorporates Arabic text normalization, strategic data augmentation using 16,678 samples from external scientific abstracts, and threshold optimization prioritizing recall. On the official test set, our system achieved an F1-score of 0.763, an accuracy of 0.695, a precision of 0.624, and a recall of 0.980, demonstrating strong detection of machine-generated texts with minimal false negatives at the cost of elevated false positives. Analysis reveals critical insights into precision-recall trade-offs and challenges in cross-domain generalization for Arabic AI text detection.</abstract>
<identifier type="citekey">francies-etal-2026-reglat</identifier>
<location>
<url>https://aclanthology.org/2026.abjadnlp-1.62/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>493</start>
<end>496</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T REGLAT at AbjadGenEval: Multi-Model Ensemble Approach for Arabic AI-Generated Text Detection
%A Francies, Mariam Labib
%A Ashraf, Nsrin
%A Fetouh, Ahmed Megahed
%A Nayel, Hamada
%S Proceedings of the 2nd Workshop on NLP for Languages Using Arabic Script
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%F francies-etal-2026-reglat
%X The rapid advancement of large language models necessitates robust methods for detecting AI-generated Arabic text. This paper presents our system for distinguishing human-written from machine-generated Arabic content. We propose a weighted ensemble combining AraBERTv2 and BERT-base-arabic, trained via 5-fold stratified cross-validation with class-balanced loss functions. Our methodology incorporates Arabic text normalization, strategic data augmentation using 16,678 samples from external scientific abstracts, and threshold optimization prioritizing recall. On the official test set, our system achieved an F1-score of 0.763, an accuracy of 0.695, a precision of 0.624, and a recall of 0.980, demonstrating strong detection of machine-generated texts with minimal false negatives at the cost of elevated false positives. Analysis reveals critical insights into precision-recall trade-offs and challenges in cross-domain generalization for Arabic AI text detection.
%U https://aclanthology.org/2026.abjadnlp-1.62/
%P 493-496
Markdown (Informal)
[REGLAT at AbjadGenEval: Multi-Model Ensemble Approach for Arabic AI-Generated Text Detection](https://aclanthology.org/2026.abjadnlp-1.62/) (Francies et al., AbjadNLP 2026)
ACL