@inproceedings{al-khalifa-2025-iwan,
title = "i{WAN}-{NLP} at {AH}a{SIS} 2025: A Stacked Ensemble of {A}rabic Transformers for Sentiment Analysis on {A}rabic Dialects in the Hospitality Domain",
author = "Al-Khalifa, Hend",
editor = "Alharbi, Maram and
Chafik, Salmane and
Ezzini, Saad and
Mitkov, Ruslan and
Ranasinghe, Tharindu and
Hettiarachchi, Hansi",
booktitle = "Proceedings of the Shared Task on Sentiment Analysis for Arabic Dialects",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2025.ranlp-ahasis.2/",
pages = "7--13",
abstract = "This paper details the iWAN-NLP system developed for participation in the AHaSIS 2025 shared task, ``Sentiment Analysis on Arabic Dialects in the Hospitality Domain: A Multi-Dialect Benchmark.'' Our approach leverages a multi-model ensemble strategy, combining the strengths of MARBERTv2, Saudibert, and DarijaBERT. These pre-trained Arabic language models were fine-tuned for sentiment classification using a 5-fold stratified cross-validation methodology. The final predictions on the test set were derived by averaging the logits produced by each model across all folds and then averaging these combined logits across the three models. This system achieved a macro F1-score of 81.0{\%} on the official evaluation dataset and a cross-validated macro F1-score of 0.8513 (accuracy 0.8628) on the training set. Our findings highlight the effectiveness of ensembling regionally adapted models and robust cross-validation for Arabic sentiment analysis in the hospitality domain, ultimately securing first place in the AHaSIS 2025 shared task."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="al-khalifa-2025-iwan">
<titleInfo>
<title>iWAN-NLP at AHaSIS 2025: A Stacked Ensemble of Arabic Transformers for Sentiment Analysis on Arabic Dialects in the Hospitality Domain</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Shared Task on Sentiment Analysis for Arabic Dialects</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maram</namePart>
<namePart type="family">Alharbi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Salmane</namePart>
<namePart type="family">Chafik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saad</namePart>
<namePart type="family">Ezzini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tharindu</namePart>
<namePart type="family">Ranasinghe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hansi</namePart>
<namePart type="family">Hettiarachchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper details the iWAN-NLP system developed for participation in the AHaSIS 2025 shared task, “Sentiment Analysis on Arabic Dialects in the Hospitality Domain: A Multi-Dialect Benchmark.” Our approach leverages a multi-model ensemble strategy, combining the strengths of MARBERTv2, Saudibert, and DarijaBERT. These pre-trained Arabic language models were fine-tuned for sentiment classification using a 5-fold stratified cross-validation methodology. The final predictions on the test set were derived by averaging the logits produced by each model across all folds and then averaging these combined logits across the three models. This system achieved a macro F1-score of 81.0% on the official evaluation dataset and a cross-validated macro F1-score of 0.8513 (accuracy 0.8628) on the training set. Our findings highlight the effectiveness of ensembling regionally adapted models and robust cross-validation for Arabic sentiment analysis in the hospitality domain, ultimately securing first place in the AHaSIS 2025 shared task.</abstract>
<identifier type="citekey">al-khalifa-2025-iwan</identifier>
<location>
<url>https://aclanthology.org/2025.ranlp-ahasis.2/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>7</start>
<end>13</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T iWAN-NLP at AHaSIS 2025: A Stacked Ensemble of Arabic Transformers for Sentiment Analysis on Arabic Dialects in the Hospitality Domain
%A Al-Khalifa, Hend
%Y Alharbi, Maram
%Y Chafik, Salmane
%Y Ezzini, Saad
%Y Mitkov, Ruslan
%Y Ranasinghe, Tharindu
%Y Hettiarachchi, Hansi
%S Proceedings of the Shared Task on Sentiment Analysis for Arabic Dialects
%D 2025
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F al-khalifa-2025-iwan
%X This paper details the iWAN-NLP system developed for participation in the AHaSIS 2025 shared task, “Sentiment Analysis on Arabic Dialects in the Hospitality Domain: A Multi-Dialect Benchmark.” Our approach leverages a multi-model ensemble strategy, combining the strengths of MARBERTv2, Saudibert, and DarijaBERT. These pre-trained Arabic language models were fine-tuned for sentiment classification using a 5-fold stratified cross-validation methodology. The final predictions on the test set were derived by averaging the logits produced by each model across all folds and then averaging these combined logits across the three models. This system achieved a macro F1-score of 81.0% on the official evaluation dataset and a cross-validated macro F1-score of 0.8513 (accuracy 0.8628) on the training set. Our findings highlight the effectiveness of ensembling regionally adapted models and robust cross-validation for Arabic sentiment analysis in the hospitality domain, ultimately securing first place in the AHaSIS 2025 shared task.
%U https://aclanthology.org/2025.ranlp-ahasis.2/
%P 7-13
Markdown (Informal)
[iWAN-NLP at AHaSIS 2025: A Stacked Ensemble of Arabic Transformers for Sentiment Analysis on Arabic Dialects in the Hospitality Domain](https://aclanthology.org/2025.ranlp-ahasis.2/) (Al-Khalifa, RANLP 2025)
ACL