@inproceedings{alghamdi-etal-2026-improving,
title = "Improving on State-of-the-Art Models for Sentiment Analysis on Saudi-{E}nglish Code-Switching Text",
author = "Alghamdi, Samaher and
Rayson, Paul and
Alotibi, Reem",
booktitle = "Proceedings of the 2nd Workshop on {NLP} for Languages Using {A}rabic Script",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.abjadnlp-1.30/",
pages = "218--228",
abstract = "Inserting English words, phrases, or sentences while writing or speaking in the Saudi Arabic dialect has become a widespread phenomenon in Saudi society. This phenomenon is linguistically called code-switching. It remains unclear how current sentiment analysis methods perform on Saudi-English code-switching text. In this paper, we address this gap by conducting the first sentiment analysis study on Saudi-English code-switching text. We present the first Saudi-English Sentiment Analysis Code Switching Dataset (SESA-CSD) and establish baseline results on this dataset. By evaluating multiple state-of-the-art small language models, we achieve improvements over the baseline of 3{\%} to 11{\%} in both accuracy and macro-F1. Among all small language models, XLM-RoBERTa achieved the highest performance,with an accuracy of 95.50{\%} and a macro-F1 of 95.53{\%}. Our findings indicate that multilingual and Arabic small language models, such as XLM-RoBERTa, GigaBERT, and SaudiBERT, consistently outperform bilingual Arabic-English large language models, such as Fanar and ALLaM, across zero-shot and multiple few-shot settings."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="alghamdi-etal-2026-improving">
<titleInfo>
<title>Improving on State-of-the-Art Models for Sentiment Analysis on Saudi-English Code-Switching Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Samaher</namePart>
<namePart type="family">Alghamdi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Rayson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Reem</namePart>
<namePart type="family">Alotibi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on NLP for Languages Using Arabic Script</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Inserting English words, phrases, or sentences while writing or speaking in the Saudi Arabic dialect has become a widespread phenomenon in Saudi society. This phenomenon is linguistically called code-switching. It remains unclear how current sentiment analysis methods perform on Saudi-English code-switching text. In this paper, we address this gap by conducting the first sentiment analysis study on Saudi-English code-switching text. We present the first Saudi-English Sentiment Analysis Code Switching Dataset (SESA-CSD) and establish baseline results on this dataset. By evaluating multiple state-of-the-art small language models, we achieve improvements over the baseline of 3% to 11% in both accuracy and macro-F1. Among all small language models, XLM-RoBERTa achieved the highest performance,with an accuracy of 95.50% and a macro-F1 of 95.53%. Our findings indicate that multilingual and Arabic small language models, such as XLM-RoBERTa, GigaBERT, and SaudiBERT, consistently outperform bilingual Arabic-English large language models, such as Fanar and ALLaM, across zero-shot and multiple few-shot settings.</abstract>
<identifier type="citekey">alghamdi-etal-2026-improving</identifier>
<location>
<url>https://aclanthology.org/2026.abjadnlp-1.30/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>218</start>
<end>228</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Improving on State-of-the-Art Models for Sentiment Analysis on Saudi-English Code-Switching Text
%A Alghamdi, Samaher
%A Rayson, Paul
%A Alotibi, Reem
%S Proceedings of the 2nd Workshop on NLP for Languages Using Arabic Script
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%F alghamdi-etal-2026-improving
%X Inserting English words, phrases, or sentences while writing or speaking in the Saudi Arabic dialect has become a widespread phenomenon in Saudi society. This phenomenon is linguistically called code-switching. It remains unclear how current sentiment analysis methods perform on Saudi-English code-switching text. In this paper, we address this gap by conducting the first sentiment analysis study on Saudi-English code-switching text. We present the first Saudi-English Sentiment Analysis Code Switching Dataset (SESA-CSD) and establish baseline results on this dataset. By evaluating multiple state-of-the-art small language models, we achieve improvements over the baseline of 3% to 11% in both accuracy and macro-F1. Among all small language models, XLM-RoBERTa achieved the highest performance,with an accuracy of 95.50% and a macro-F1 of 95.53%. Our findings indicate that multilingual and Arabic small language models, such as XLM-RoBERTa, GigaBERT, and SaudiBERT, consistently outperform bilingual Arabic-English large language models, such as Fanar and ALLaM, across zero-shot and multiple few-shot settings.
%U https://aclanthology.org/2026.abjadnlp-1.30/
%P 218-228
Markdown (Informal)
[Improving on State-of-the-Art Models for Sentiment Analysis on Saudi-English Code-Switching Text](https://aclanthology.org/2026.abjadnlp-1.30/) (Alghamdi et al., AbjadNLP 2026)
ACL