@inproceedings{dadure-etal-2025-sentiment,
title = "Sentiment Analysis of {A}rabic Tweets Using Large Language Models",
author = "Dadure, Pankaj and
Dixit, Ananya and
Tewatia, Kunal and
Paliwal, Nandini and
Malla, Anshika",
editor = "El-Haj, Mo",
booktitle = "Proceedings of the 1st Workshop on NLP for Languages Using Arabic Script",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.abjadnlp-1.10/",
pages = "88--94",
abstract = "In the digital era, sentiment analysis has become an indispensable tool for understanding public sentiments, optimizing market strategies, and enhancing customer engagement across diverse sectors. While significant advancements have been made in sentiment analysis for high-resource languages such as English, French, etc. This study focuses on Arabic, a low-resource language, to address its unique challenges like morphological complexity, diverse dialects, and limited linguistic resources. Existing works in Arabic sentiment analysis have utilized deep learning architectures like LSTM, BiLSTM, and CNN-LSTM, alongside embedding techniques such as Word2Vec and contextualized models like ARABERT. Building on this foundation, our research investigates sentiment classification of Arabic tweets, categorizing them as positive or negative, using embeddings derived from three large language models (LLMs): Universal Sentence Encoder (USE), XLM-RoBERTa base (XLM-R base), and MiniLM-L12-v2. Experimental results demonstrate that incorporating emojis in the dataset and using the MiniLM embeddings yield an accuracy of 85.98{\%}. In contrast, excluding emojis and using embeddings from the XLM-R base resulted in a lower accuracy of 78.98{\%}. These findings highlight the impact of both dataset composition and embedding techniques on Arabic sentiment analysis performance."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dadure-etal-2025-sentiment">
<titleInfo>
<title>Sentiment Analysis of Arabic Tweets Using Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pankaj</namePart>
<namePart type="family">Dadure</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ananya</namePart>
<namePart type="family">Dixit</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kunal</namePart>
<namePart type="family">Tewatia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nandini</namePart>
<namePart type="family">Paliwal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anshika</namePart>
<namePart type="family">Malla</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on NLP for Languages Using Arabic Script</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mo</namePart>
<namePart type="family">El-Haj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In the digital era, sentiment analysis has become an indispensable tool for understanding public sentiments, optimizing market strategies, and enhancing customer engagement across diverse sectors. While significant advancements have been made in sentiment analysis for high-resource languages such as English, French, etc. This study focuses on Arabic, a low-resource language, to address its unique challenges like morphological complexity, diverse dialects, and limited linguistic resources. Existing works in Arabic sentiment analysis have utilized deep learning architectures like LSTM, BiLSTM, and CNN-LSTM, alongside embedding techniques such as Word2Vec and contextualized models like ARABERT. Building on this foundation, our research investigates sentiment classification of Arabic tweets, categorizing them as positive or negative, using embeddings derived from three large language models (LLMs): Universal Sentence Encoder (USE), XLM-RoBERTa base (XLM-R base), and MiniLM-L12-v2. Experimental results demonstrate that incorporating emojis in the dataset and using the MiniLM embeddings yield an accuracy of 85.98%. In contrast, excluding emojis and using embeddings from the XLM-R base resulted in a lower accuracy of 78.98%. These findings highlight the impact of both dataset composition and embedding techniques on Arabic sentiment analysis performance.</abstract>
<identifier type="citekey">dadure-etal-2025-sentiment</identifier>
<location>
<url>https://aclanthology.org/2025.abjadnlp-1.10/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>88</start>
<end>94</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Sentiment Analysis of Arabic Tweets Using Large Language Models
%A Dadure, Pankaj
%A Dixit, Ananya
%A Tewatia, Kunal
%A Paliwal, Nandini
%A Malla, Anshika
%Y El-Haj, Mo
%S Proceedings of the 1st Workshop on NLP for Languages Using Arabic Script
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F dadure-etal-2025-sentiment
%X In the digital era, sentiment analysis has become an indispensable tool for understanding public sentiments, optimizing market strategies, and enhancing customer engagement across diverse sectors. While significant advancements have been made in sentiment analysis for high-resource languages such as English, French, etc. This study focuses on Arabic, a low-resource language, to address its unique challenges like morphological complexity, diverse dialects, and limited linguistic resources. Existing works in Arabic sentiment analysis have utilized deep learning architectures like LSTM, BiLSTM, and CNN-LSTM, alongside embedding techniques such as Word2Vec and contextualized models like ARABERT. Building on this foundation, our research investigates sentiment classification of Arabic tweets, categorizing them as positive or negative, using embeddings derived from three large language models (LLMs): Universal Sentence Encoder (USE), XLM-RoBERTa base (XLM-R base), and MiniLM-L12-v2. Experimental results demonstrate that incorporating emojis in the dataset and using the MiniLM embeddings yield an accuracy of 85.98%. In contrast, excluding emojis and using embeddings from the XLM-R base resulted in a lower accuracy of 78.98%. These findings highlight the impact of both dataset composition and embedding techniques on Arabic sentiment analysis performance.
%U https://aclanthology.org/2025.abjadnlp-1.10/
%P 88-94
Markdown (Informal)
[Sentiment Analysis of Arabic Tweets Using Large Language Models](https://aclanthology.org/2025.abjadnlp-1.10/) (Dadure et al., AbjadNLP 2025)
ACL