@inproceedings{alsuhaibani-alkaoud-2026-uslub,
title = "Uslub at {A}bjad{A}uthor{ID} Shared Task: A Comparative Analysis of Traditional Machine Learning and Transformer-Based Models for Authorship Attribution in {A}rabic and {U}rdu",
author = "Alsuhaibani, Shahad and
Alkaoud, Mohamed",
booktitle = "Proceedings of the 2nd Workshop on {NLP} for Languages Using {A}rabic Script",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.abjadnlp-1.65/",
pages = "515--519",
abstract = "Authorship attribution is a critical task in natural language processing with applications ranging from forensic linguistics to plagiarism detection. While well-studied in high-resource languages, it remains challenging for low-resource languages like Arabic and Urdu. In this paper, we present our participation in the AbjadNLP shared task, where we systematically evaluate three distinct approaches: traditional machine learning using SVM with TF-IDF features, fine-tuned transformer-based models (AraBERT), and LLMs. We demonstrate that while fine-tuned AraBERT excels in Arabic, traditional lexical models (SVM) prove more robust for Urdu, outperforming both BERT-based and LLM approaches. We also show that few-shot prompting with LLMs, when operated as a reranker over top candidates, significantly outperforms zero-shot baselines. Our final systems achieved competitive performance, ranking 6th and 1st in the Arabic and Urdu tasks respectively."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="alsuhaibani-alkaoud-2026-uslub">
<titleInfo>
<title>Uslub at AbjadAuthorID Shared Task: A Comparative Analysis of Traditional Machine Learning and Transformer-Based Models for Authorship Attribution in Arabic and Urdu</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shahad</namePart>
<namePart type="family">Alsuhaibani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohamed</namePart>
<namePart type="family">Alkaoud</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on NLP for Languages Using Arabic Script</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Authorship attribution is a critical task in natural language processing with applications ranging from forensic linguistics to plagiarism detection. While well-studied in high-resource languages, it remains challenging for low-resource languages like Arabic and Urdu. In this paper, we present our participation in the AbjadNLP shared task, where we systematically evaluate three distinct approaches: traditional machine learning using SVM with TF-IDF features, fine-tuned transformer-based models (AraBERT), and LLMs. We demonstrate that while fine-tuned AraBERT excels in Arabic, traditional lexical models (SVM) prove more robust for Urdu, outperforming both BERT-based and LLM approaches. We also show that few-shot prompting with LLMs, when operated as a reranker over top candidates, significantly outperforms zero-shot baselines. Our final systems achieved competitive performance, ranking 6th and 1st in the Arabic and Urdu tasks respectively.</abstract>
<identifier type="citekey">alsuhaibani-alkaoud-2026-uslub</identifier>
<location>
<url>https://aclanthology.org/2026.abjadnlp-1.65/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>515</start>
<end>519</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Uslub at AbjadAuthorID Shared Task: A Comparative Analysis of Traditional Machine Learning and Transformer-Based Models for Authorship Attribution in Arabic and Urdu
%A Alsuhaibani, Shahad
%A Alkaoud, Mohamed
%S Proceedings of the 2nd Workshop on NLP for Languages Using Arabic Script
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%F alsuhaibani-alkaoud-2026-uslub
%X Authorship attribution is a critical task in natural language processing with applications ranging from forensic linguistics to plagiarism detection. While well-studied in high-resource languages, it remains challenging for low-resource languages like Arabic and Urdu. In this paper, we present our participation in the AbjadNLP shared task, where we systematically evaluate three distinct approaches: traditional machine learning using SVM with TF-IDF features, fine-tuned transformer-based models (AraBERT), and LLMs. We demonstrate that while fine-tuned AraBERT excels in Arabic, traditional lexical models (SVM) prove more robust for Urdu, outperforming both BERT-based and LLM approaches. We also show that few-shot prompting with LLMs, when operated as a reranker over top candidates, significantly outperforms zero-shot baselines. Our final systems achieved competitive performance, ranking 6th and 1st in the Arabic and Urdu tasks respectively.
%U https://aclanthology.org/2026.abjadnlp-1.65/
%P 515-519
Markdown (Informal)
[Uslub at AbjadAuthorID Shared Task: A Comparative Analysis of Traditional Machine Learning and Transformer-Based Models for Authorship Attribution in Arabic and Urdu](https://aclanthology.org/2026.abjadnlp-1.65/) (Alsuhaibani & Alkaoud, AbjadNLP 2026)
ACL