@inproceedings{zain-2026-lorad,
title = "{L}o{RAD}: Low-Resource {AI}-Generated Text Detection with {XLM}-{R}o{BERT}a",
author = "Zain, Ali",
booktitle = "Proceedings of the 2nd Workshop on {NLP} for Languages Using {A}rabic Script",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.abjadnlp-1.57/",
pages = "468--471",
abstract = "This paper describes our system submitted to the AbjadGenEval Shared Task at ArabicNLP 2026, which focuses on binary classification of human-written versus machine-generated text in low-resource languages. We participated in two independent subtasks targeting Arabic and Urdu news and literary texts. Our approach relies exclusively on fine-tuning XLM-RoBERTa, a multilingual Transformer-based model, under carefully controlled training and preprocessing settings. While the same model architecture was used for both subtasks, language-specific data handling strategies were applied based on empirical observations. The proposed system achieved first place in the Urdu subtask and third place in the Arabic subtask according to the official evaluation. These results demonstrate that multilingual pretrained models can serve as strong and reliable systems for AI-generated text detection across diverse languages."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zain-2026-lorad">
<titleInfo>
<title>LoRAD: Low-Resource AI-Generated Text Detection with XLM-RoBERTa</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ali</namePart>
<namePart type="family">Zain</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on NLP for Languages Using Arabic Script</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes our system submitted to the AbjadGenEval Shared Task at ArabicNLP 2026, which focuses on binary classification of human-written versus machine-generated text in low-resource languages. We participated in two independent subtasks targeting Arabic and Urdu news and literary texts. Our approach relies exclusively on fine-tuning XLM-RoBERTa, a multilingual Transformer-based model, under carefully controlled training and preprocessing settings. While the same model architecture was used for both subtasks, language-specific data handling strategies were applied based on empirical observations. The proposed system achieved first place in the Urdu subtask and third place in the Arabic subtask according to the official evaluation. These results demonstrate that multilingual pretrained models can serve as strong and reliable systems for AI-generated text detection across diverse languages.</abstract>
<identifier type="citekey">zain-2026-lorad</identifier>
<location>
<url>https://aclanthology.org/2026.abjadnlp-1.57/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>468</start>
<end>471</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T LoRAD: Low-Resource AI-Generated Text Detection with XLM-RoBERTa
%A Zain, Ali
%S Proceedings of the 2nd Workshop on NLP for Languages Using Arabic Script
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%F zain-2026-lorad
%X This paper describes our system submitted to the AbjadGenEval Shared Task at ArabicNLP 2026, which focuses on binary classification of human-written versus machine-generated text in low-resource languages. We participated in two independent subtasks targeting Arabic and Urdu news and literary texts. Our approach relies exclusively on fine-tuning XLM-RoBERTa, a multilingual Transformer-based model, under carefully controlled training and preprocessing settings. While the same model architecture was used for both subtasks, language-specific data handling strategies were applied based on empirical observations. The proposed system achieved first place in the Urdu subtask and third place in the Arabic subtask according to the official evaluation. These results demonstrate that multilingual pretrained models can serve as strong and reliable systems for AI-generated text detection across diverse languages.
%U https://aclanthology.org/2026.abjadnlp-1.57/
%P 468-471
Markdown (Informal)
[LoRAD: Low-Resource AI-Generated Text Detection with XLM-RoBERTa](https://aclanthology.org/2026.abjadnlp-1.57/) (Zain, AbjadNLP 2026)
ACL