@inproceedings{nawaz-etal-2026-ayahverse,
title = "{A}yah{V}erse at {A}bjad{G}en{E}val Shared Task: Monolingual Precision and Cross-Lingual Analysis in {P}erso-{A}rabic {AI} Detection",
author = "Nawaz, Fizza and
Rashid, Ibad-ur-Rehman and
Abid, Uswa and
Hussain, Junaid",
booktitle = "Proceedings of the 2nd Workshop on {NLP} for Languages Using {A}rabic Script",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.abjadnlp-1.63/",
pages = "497--505",
abstract = "This paper presents our submission to the AbjadGenEval shared task on AI-generated text detection in Arabic and Urdu. To address the challenges of morphologically rich and low-resource environments, we developed a composite framework leveraging monolingual specialists (AraBERTv2, CAMeLBERT-DA) and multilingual transformers. Our system achieved robust in-domain performance with Test F1-scores of 0.75 for Arabic and 0.86 for Urdu. Methodologically, we tested both raw and normalized text to distinguish whether models detect based on semantic content or on surface artifacts such as punctuation and formatting patterns. Furthermore, our cross-lingual investigations reveal directional performance differences, where Urdu-trained models achieve 0.75 F1 on Arabic, while Arabic-trained models achieve only 0.61 F1 on Urdu. Despite this difference, both directions maintained notably high recall for the machine class, indicating that the model learns cross-lingual machine detection patterns across the Perso-Arabic script. Finally, transfer performance collapsed when internal layers were frozen, demonstrating that full fine-tuning is essential for cross-lingual detection. However, the observed performance differences may partly reflect data imbalance rather than purely linguistic factors."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nawaz-etal-2026-ayahverse">
<titleInfo>
<title>AyahVerse at AbjadGenEval Shared Task: Monolingual Precision and Cross-Lingual Analysis in Perso-Arabic AI Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Fizza</namePart>
<namePart type="family">Nawaz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ibad-ur-Rehman</namePart>
<namePart type="family">Rashid</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Uswa</namePart>
<namePart type="family">Abid</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junaid</namePart>
<namePart type="family">Hussain</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on NLP for Languages Using Arabic Script</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents our submission to the AbjadGenEval shared task on AI-generated text detection in Arabic and Urdu. To address the challenges of morphologically rich and low-resource environments, we developed a composite framework leveraging monolingual specialists (AraBERTv2, CAMeLBERT-DA) and multilingual transformers. Our system achieved robust in-domain performance with Test F1-scores of 0.75 for Arabic and 0.86 for Urdu. Methodologically, we tested both raw and normalized text to distinguish whether models detect based on semantic content or on surface artifacts such as punctuation and formatting patterns. Furthermore, our cross-lingual investigations reveal directional performance differences, where Urdu-trained models achieve 0.75 F1 on Arabic, while Arabic-trained models achieve only 0.61 F1 on Urdu. Despite this difference, both directions maintained notably high recall for the machine class, indicating that the model learns cross-lingual machine detection patterns across the Perso-Arabic script. Finally, transfer performance collapsed when internal layers were frozen, demonstrating that full fine-tuning is essential for cross-lingual detection. However, the observed performance differences may partly reflect data imbalance rather than purely linguistic factors.</abstract>
<identifier type="citekey">nawaz-etal-2026-ayahverse</identifier>
<location>
<url>https://aclanthology.org/2026.abjadnlp-1.63/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>497</start>
<end>505</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T AyahVerse at AbjadGenEval Shared Task: Monolingual Precision and Cross-Lingual Analysis in Perso-Arabic AI Detection
%A Nawaz, Fizza
%A Rashid, Ibad-ur-Rehman
%A Abid, Uswa
%A Hussain, Junaid
%S Proceedings of the 2nd Workshop on NLP for Languages Using Arabic Script
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%F nawaz-etal-2026-ayahverse
%X This paper presents our submission to the AbjadGenEval shared task on AI-generated text detection in Arabic and Urdu. To address the challenges of morphologically rich and low-resource environments, we developed a composite framework leveraging monolingual specialists (AraBERTv2, CAMeLBERT-DA) and multilingual transformers. Our system achieved robust in-domain performance with Test F1-scores of 0.75 for Arabic and 0.86 for Urdu. Methodologically, we tested both raw and normalized text to distinguish whether models detect based on semantic content or on surface artifacts such as punctuation and formatting patterns. Furthermore, our cross-lingual investigations reveal directional performance differences, where Urdu-trained models achieve 0.75 F1 on Arabic, while Arabic-trained models achieve only 0.61 F1 on Urdu. Despite this difference, both directions maintained notably high recall for the machine class, indicating that the model learns cross-lingual machine detection patterns across the Perso-Arabic script. Finally, transfer performance collapsed when internal layers were frozen, demonstrating that full fine-tuning is essential for cross-lingual detection. However, the observed performance differences may partly reflect data imbalance rather than purely linguistic factors.
%U https://aclanthology.org/2026.abjadnlp-1.63/
%P 497-505
Markdown (Informal)
[AyahVerse at AbjadGenEval Shared Task: Monolingual Precision and Cross-Lingual Analysis in Perso-Arabic AI Detection](https://aclanthology.org/2026.abjadnlp-1.63/) (Nawaz et al., AbjadNLP 2026)
ACL