@inproceedings{hossain-etal-2025-enhancing,
title = "Enhancing Dialectal {A}rabic Intent Detection through Cross-Dialect Multilingual Input Augmentation",
author = "Hossain, Shehenaz and
Shammary, Fouad and
Shammary, Bahaulddin and
Afli, Haithem",
editor = "Ezzini, Saad and
Alami, Hamza and
Berrada, Ismail and
Benlahbib, Abdessamad and
El Mahdaouy, Abdelkader and
Lamsiyah, Salima and
Derrouz, Hatim and
Haddad Haddad, Amal and
Jarrar, Mustafa and
El-Haj, Mo and
Mitkov, Ruslan and
Rayson, Paul",
booktitle = "Proceedings of the 4th Workshop on Arabic Corpus Linguistics (WACL-4)",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.wacl-1.5/",
pages = "44--49",
abstract = "Addressing the challenges of Arabic intent detection amid extensive dialectal variation, this study presents a crossdialtectal, multilingual approach for classifying intents in banking and migration contexts. By augmenting dialectal inputs with Modern Standard Arabic (MSA) and English translations, our method leverages cross-lingual context to improve classification accuracy. We evaluate single-input (dialect-only), dual-input (dialect + MSA), and triple-input (dialect + MSA + English) models, applying language-specific tokenization for each. Results demonstrate that, in the migration dataset, our model achieved an accuracy gain of over 50{\%} on Tunisian dialect, increasing from 43.3{\%} with dialect-only input to 94{\%} with the full multilingual setup. Similarly, in the PAL (Palestinian dialect) dataset, accuracy improved from 87.7{\%} to 93.5{\%} with translation augmentation, reflecting a gain of 5.8 percentage points. These findings underscore the effectiveness of our approach for intent detection across various Arabic dialects."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hossain-etal-2025-enhancing">
<titleInfo>
<title>Enhancing Dialectal Arabic Intent Detection through Cross-Dialect Multilingual Input Augmentation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shehenaz</namePart>
<namePart type="family">Hossain</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fouad</namePart>
<namePart type="family">Shammary</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bahaulddin</namePart>
<namePart type="family">Shammary</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haithem</namePart>
<namePart type="family">Afli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Workshop on Arabic Corpus Linguistics (WACL-4)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saad</namePart>
<namePart type="family">Ezzini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hamza</namePart>
<namePart type="family">Alami</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ismail</namePart>
<namePart type="family">Berrada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abdessamad</namePart>
<namePart type="family">Benlahbib</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abdelkader</namePart>
<namePart type="family">El Mahdaouy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Salima</namePart>
<namePart type="family">Lamsiyah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hatim</namePart>
<namePart type="family">Derrouz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amal</namePart>
<namePart type="family">Haddad Haddad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mustafa</namePart>
<namePart type="family">Jarrar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mo</namePart>
<namePart type="family">El-Haj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Rayson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Addressing the challenges of Arabic intent detection amid extensive dialectal variation, this study presents a crossdialtectal, multilingual approach for classifying intents in banking and migration contexts. By augmenting dialectal inputs with Modern Standard Arabic (MSA) and English translations, our method leverages cross-lingual context to improve classification accuracy. We evaluate single-input (dialect-only), dual-input (dialect + MSA), and triple-input (dialect + MSA + English) models, applying language-specific tokenization for each. Results demonstrate that, in the migration dataset, our model achieved an accuracy gain of over 50% on Tunisian dialect, increasing from 43.3% with dialect-only input to 94% with the full multilingual setup. Similarly, in the PAL (Palestinian dialect) dataset, accuracy improved from 87.7% to 93.5% with translation augmentation, reflecting a gain of 5.8 percentage points. These findings underscore the effectiveness of our approach for intent detection across various Arabic dialects.</abstract>
<identifier type="citekey">hossain-etal-2025-enhancing</identifier>
<location>
<url>https://aclanthology.org/2025.wacl-1.5/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>44</start>
<end>49</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Enhancing Dialectal Arabic Intent Detection through Cross-Dialect Multilingual Input Augmentation
%A Hossain, Shehenaz
%A Shammary, Fouad
%A Shammary, Bahaulddin
%A Afli, Haithem
%Y Ezzini, Saad
%Y Alami, Hamza
%Y Berrada, Ismail
%Y Benlahbib, Abdessamad
%Y El Mahdaouy, Abdelkader
%Y Lamsiyah, Salima
%Y Derrouz, Hatim
%Y Haddad Haddad, Amal
%Y Jarrar, Mustafa
%Y El-Haj, Mo
%Y Mitkov, Ruslan
%Y Rayson, Paul
%S Proceedings of the 4th Workshop on Arabic Corpus Linguistics (WACL-4)
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F hossain-etal-2025-enhancing
%X Addressing the challenges of Arabic intent detection amid extensive dialectal variation, this study presents a crossdialtectal, multilingual approach for classifying intents in banking and migration contexts. By augmenting dialectal inputs with Modern Standard Arabic (MSA) and English translations, our method leverages cross-lingual context to improve classification accuracy. We evaluate single-input (dialect-only), dual-input (dialect + MSA), and triple-input (dialect + MSA + English) models, applying language-specific tokenization for each. Results demonstrate that, in the migration dataset, our model achieved an accuracy gain of over 50% on Tunisian dialect, increasing from 43.3% with dialect-only input to 94% with the full multilingual setup. Similarly, in the PAL (Palestinian dialect) dataset, accuracy improved from 87.7% to 93.5% with translation augmentation, reflecting a gain of 5.8 percentage points. These findings underscore the effectiveness of our approach for intent detection across various Arabic dialects.
%U https://aclanthology.org/2025.wacl-1.5/
%P 44-49
Markdown (Informal)
[Enhancing Dialectal Arabic Intent Detection through Cross-Dialect Multilingual Input Augmentation](https://aclanthology.org/2025.wacl-1.5/) (Hossain et al., WACL 2025)
ACL