@inproceedings{biswas-zaghouani-2025-enhancing,
title = "Enhancing {A}rabic Dialectal Sentiment Analysis through Advanced Data Augmentation Techniques",
author = "Biswas, Md. Rafiul and
Zaghouani, Wajdi",
editor = "Alharbi, Maram and
Chafik, Salmane and
Ezzini, Saad and
Mitkov, Ruslan and
Ranasinghe, Tharindu and
Hettiarachchi, Hansi",
booktitle = "Proceedings of the Shared Task on Sentiment Analysis for Arabic Dialects",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2025.ranlp-ahasis.4/",
pages = "24--28",
abstract = "This work addresses the challenge of Arabic sentiment analysis in the hospitality domain in all dialects by using data augmentation techniques. We created a pipeline with three simple techniques: context-based paraphrasing, pattern-based sentence generation, and domain-specific word replacement. Our method preserves the original dialect features, meanings, and key classification details while adding diversity to the training data. It also includes automatic fallback between methods to handle challenges effectively. We used the Fanar API for dialectal data augmentation in the hospitality domain. The AraBERT-Large-v02 model was fine-tuned on original and augmented data, showing improved performance. This study helps solve the problem of limited dialect data in Arabic NLP and offers an effective framework that is useful for other Arabic text analysis tasks."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="biswas-zaghouani-2025-enhancing">
<titleInfo>
<title>Enhancing Arabic Dialectal Sentiment Analysis through Advanced Data Augmentation Techniques</title>
</titleInfo>
<name type="personal">
<namePart type="given">Md.</namePart>
<namePart type="given">Rafiul</namePart>
<namePart type="family">Biswas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wajdi</namePart>
<namePart type="family">Zaghouani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Shared Task on Sentiment Analysis for Arabic Dialects</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maram</namePart>
<namePart type="family">Alharbi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Salmane</namePart>
<namePart type="family">Chafik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saad</namePart>
<namePart type="family">Ezzini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tharindu</namePart>
<namePart type="family">Ranasinghe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hansi</namePart>
<namePart type="family">Hettiarachchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This work addresses the challenge of Arabic sentiment analysis in the hospitality domain in all dialects by using data augmentation techniques. We created a pipeline with three simple techniques: context-based paraphrasing, pattern-based sentence generation, and domain-specific word replacement. Our method preserves the original dialect features, meanings, and key classification details while adding diversity to the training data. It also includes automatic fallback between methods to handle challenges effectively. We used the Fanar API for dialectal data augmentation in the hospitality domain. The AraBERT-Large-v02 model was fine-tuned on original and augmented data, showing improved performance. This study helps solve the problem of limited dialect data in Arabic NLP and offers an effective framework that is useful for other Arabic text analysis tasks.</abstract>
<identifier type="citekey">biswas-zaghouani-2025-enhancing</identifier>
<location>
<url>https://aclanthology.org/2025.ranlp-ahasis.4/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>24</start>
<end>28</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Enhancing Arabic Dialectal Sentiment Analysis through Advanced Data Augmentation Techniques
%A Biswas, Md. Rafiul
%A Zaghouani, Wajdi
%Y Alharbi, Maram
%Y Chafik, Salmane
%Y Ezzini, Saad
%Y Mitkov, Ruslan
%Y Ranasinghe, Tharindu
%Y Hettiarachchi, Hansi
%S Proceedings of the Shared Task on Sentiment Analysis for Arabic Dialects
%D 2025
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F biswas-zaghouani-2025-enhancing
%X This work addresses the challenge of Arabic sentiment analysis in the hospitality domain in all dialects by using data augmentation techniques. We created a pipeline with three simple techniques: context-based paraphrasing, pattern-based sentence generation, and domain-specific word replacement. Our method preserves the original dialect features, meanings, and key classification details while adding diversity to the training data. It also includes automatic fallback between methods to handle challenges effectively. We used the Fanar API for dialectal data augmentation in the hospitality domain. The AraBERT-Large-v02 model was fine-tuned on original and augmented data, showing improved performance. This study helps solve the problem of limited dialect data in Arabic NLP and offers an effective framework that is useful for other Arabic text analysis tasks.
%U https://aclanthology.org/2025.ranlp-ahasis.4/
%P 24-28
Markdown (Informal)
[Enhancing Arabic Dialectal Sentiment Analysis through Advanced Data Augmentation Techniques](https://aclanthology.org/2025.ranlp-ahasis.4/) (Biswas & Zaghouani, RANLP 2025)
ACL