@inproceedings{alharbi-etal-2025-evaluating-large,
title = "Evaluating Large Language Models on Sentiment Analysis in {A}rabic Dialects",
author = "Alharbi, Maram I. and
Ezzini, Saad and
Hettiarachchi, Hansi and
Ranasinghe, Tharindu and
Mitkov, Ruslan",
editor = "Angelova, Galia and
Kunilovskaya, Maria and
Escribe, Marie and
Mitkov, Ruslan",
booktitle = "Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2025.ranlp-1.8/",
pages = "67--74",
abstract = "Despite recent progress in large language models (LLMs), their performance on Arabic dialects remains underexplored, particularly in the context of sentiment analysis. This study presents a comparative evaluation of three LLMs, DeepSeek-R1, Qwen2.5, and LLaMA-3, on sentiment classification across Modern Standard Arabic (MSA), Saudi dialect and Darija. We construct a balanced sentiment dataset by translating and validating MSA hotel reviews into Saudi dialect and Darija. Using parameter-efficient fine-tuning (LoRA) and dialect-specific prompts, we assess each model under matched and mismatched prompting conditions. Evaluation results show that Qwen2.5 achieves the highest macro F1 score of 79{\%} on Darija input using MSA prompts, while DeepSeek performs best when prompted in the input dialect, reaching 71{\%} on Saudi dialect. LLaMA-3 exhibits stable performance across prompt variations, with 75{\%} macro F1 on Darija input under MSA prompting. Dialect-aware prompting consistently improves classification accuracy, particularly for neutral and negative sentiment classes."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="alharbi-etal-2025-evaluating-large">
<titleInfo>
<title>Evaluating Large Language Models on Sentiment Analysis in Arabic Dialects</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maram</namePart>
<namePart type="given">I</namePart>
<namePart type="family">Alharbi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saad</namePart>
<namePart type="family">Ezzini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hansi</namePart>
<namePart type="family">Hettiarachchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tharindu</namePart>
<namePart type="family">Ranasinghe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era</title>
</titleInfo>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Kunilovskaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie</namePart>
<namePart type="family">Escribe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Despite recent progress in large language models (LLMs), their performance on Arabic dialects remains underexplored, particularly in the context of sentiment analysis. This study presents a comparative evaluation of three LLMs, DeepSeek-R1, Qwen2.5, and LLaMA-3, on sentiment classification across Modern Standard Arabic (MSA), Saudi dialect and Darija. We construct a balanced sentiment dataset by translating and validating MSA hotel reviews into Saudi dialect and Darija. Using parameter-efficient fine-tuning (LoRA) and dialect-specific prompts, we assess each model under matched and mismatched prompting conditions. Evaluation results show that Qwen2.5 achieves the highest macro F1 score of 79% on Darija input using MSA prompts, while DeepSeek performs best when prompted in the input dialect, reaching 71% on Saudi dialect. LLaMA-3 exhibits stable performance across prompt variations, with 75% macro F1 on Darija input under MSA prompting. Dialect-aware prompting consistently improves classification accuracy, particularly for neutral and negative sentiment classes.</abstract>
<identifier type="citekey">alharbi-etal-2025-evaluating-large</identifier>
<location>
<url>https://aclanthology.org/2025.ranlp-1.8/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>67</start>
<end>74</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluating Large Language Models on Sentiment Analysis in Arabic Dialects
%A Alharbi, Maram I.
%A Ezzini, Saad
%A Hettiarachchi, Hansi
%A Ranasinghe, Tharindu
%A Mitkov, Ruslan
%Y Angelova, Galia
%Y Kunilovskaya, Maria
%Y Escribe, Marie
%Y Mitkov, Ruslan
%S Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era
%D 2025
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F alharbi-etal-2025-evaluating-large
%X Despite recent progress in large language models (LLMs), their performance on Arabic dialects remains underexplored, particularly in the context of sentiment analysis. This study presents a comparative evaluation of three LLMs, DeepSeek-R1, Qwen2.5, and LLaMA-3, on sentiment classification across Modern Standard Arabic (MSA), Saudi dialect and Darija. We construct a balanced sentiment dataset by translating and validating MSA hotel reviews into Saudi dialect and Darija. Using parameter-efficient fine-tuning (LoRA) and dialect-specific prompts, we assess each model under matched and mismatched prompting conditions. Evaluation results show that Qwen2.5 achieves the highest macro F1 score of 79% on Darija input using MSA prompts, while DeepSeek performs best when prompted in the input dialect, reaching 71% on Saudi dialect. LLaMA-3 exhibits stable performance across prompt variations, with 75% macro F1 on Darija input under MSA prompting. Dialect-aware prompting consistently improves classification accuracy, particularly for neutral and negative sentiment classes.
%U https://aclanthology.org/2025.ranlp-1.8/
%P 67-74
Markdown (Informal)
[Evaluating Large Language Models on Sentiment Analysis in Arabic Dialects](https://aclanthology.org/2025.ranlp-1.8/) (Alharbi et al., RANLP 2025)
ACL
- Maram I. Alharbi, Saad Ezzini, Hansi Hettiarachchi, Tharindu Ranasinghe, and Ruslan Mitkov. 2025. Evaluating Large Language Models on Sentiment Analysis in Arabic Dialects. In Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era, pages 67–74, Varna, Bulgaria. INCOMA Ltd., Shoumen, Bulgaria.