@inproceedings{qaddoumi-etal-2026-syllable,
title = "Syllable Structures Across {A}rabic Varieties",
author = "Qaddoumi, Abdelrahim and
Kodner, Jordan and
Khalifa, Salam and
Broselow, Ellen and
Rambow, Owen",
booktitle = "Proceedings of the 13th Workshop on {NLP} for Similar Languages, Varieties and Dialects",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.vardial-1.21/",
pages = "250--260",
abstract = "This study compares the syllable structures of nine Arabic varieties from Wiktionary, using a computational syllabifier. It further investigates methods for learning syllable boundaries in unsyllabified words transcribed in the International Phonetic Alphabet (IPA). The syllabification algorithm is evaluated under three conditions: (i) Default, employing fixed rules; (ii) Joint, learning onsets and codas across all varieties collectively; and (iii) Per-variety, learning onsets and codas specific to each variety. Results indicate that the default configuration yields the highest accuracy, ranging from 97.05{\%} to 100{\%}. The per-variety approach achieves 90.64{\%} to 100{\%} accuracy, while the joint approach ranges from 84.63{\%} to 94.74{\%}. A cross-variety analysis using Jensen-Shannon divergence reveals three principal groupings: Egyptian, Hejazi, and Modern Standard Arabic are closely related; Levantine and Gulf varieties constitute a second cluster; and Juba Arabic, Maltese, and Moroccan emerge as outliers. A cleaned dataset encompassing all nine varieties is also provided."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="qaddoumi-etal-2026-syllable">
<titleInfo>
<title>Syllable Structures Across Arabic Varieties</title>
</titleInfo>
<name type="personal">
<namePart type="given">Abdelrahim</namePart>
<namePart type="family">Qaddoumi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Kodner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Salam</namePart>
<namePart type="family">Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ellen</namePart>
<namePart type="family">Broselow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th Workshop on NLP for Similar Languages, Varieties and Dialects</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This study compares the syllable structures of nine Arabic varieties from Wiktionary, using a computational syllabifier. It further investigates methods for learning syllable boundaries in unsyllabified words transcribed in the International Phonetic Alphabet (IPA). The syllabification algorithm is evaluated under three conditions: (i) Default, employing fixed rules; (ii) Joint, learning onsets and codas across all varieties collectively; and (iii) Per-variety, learning onsets and codas specific to each variety. Results indicate that the default configuration yields the highest accuracy, ranging from 97.05% to 100%. The per-variety approach achieves 90.64% to 100% accuracy, while the joint approach ranges from 84.63% to 94.74%. A cross-variety analysis using Jensen-Shannon divergence reveals three principal groupings: Egyptian, Hejazi, and Modern Standard Arabic are closely related; Levantine and Gulf varieties constitute a second cluster; and Juba Arabic, Maltese, and Moroccan emerge as outliers. A cleaned dataset encompassing all nine varieties is also provided.</abstract>
<identifier type="citekey">qaddoumi-etal-2026-syllable</identifier>
<location>
<url>https://aclanthology.org/2026.vardial-1.21/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>250</start>
<end>260</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Syllable Structures Across Arabic Varieties
%A Qaddoumi, Abdelrahim
%A Kodner, Jordan
%A Khalifa, Salam
%A Broselow, Ellen
%A Rambow, Owen
%S Proceedings of the 13th Workshop on NLP for Similar Languages, Varieties and Dialects
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%F qaddoumi-etal-2026-syllable
%X This study compares the syllable structures of nine Arabic varieties from Wiktionary, using a computational syllabifier. It further investigates methods for learning syllable boundaries in unsyllabified words transcribed in the International Phonetic Alphabet (IPA). The syllabification algorithm is evaluated under three conditions: (i) Default, employing fixed rules; (ii) Joint, learning onsets and codas across all varieties collectively; and (iii) Per-variety, learning onsets and codas specific to each variety. Results indicate that the default configuration yields the highest accuracy, ranging from 97.05% to 100%. The per-variety approach achieves 90.64% to 100% accuracy, while the joint approach ranges from 84.63% to 94.74%. A cross-variety analysis using Jensen-Shannon divergence reveals three principal groupings: Egyptian, Hejazi, and Modern Standard Arabic are closely related; Levantine and Gulf varieties constitute a second cluster; and Juba Arabic, Maltese, and Moroccan emerge as outliers. A cleaned dataset encompassing all nine varieties is also provided.
%U https://aclanthology.org/2026.vardial-1.21/
%P 250-260
Markdown (Informal)
[Syllable Structures Across Arabic Varieties](https://aclanthology.org/2026.vardial-1.21/) (Qaddoumi et al., VarDial 2026)
ACL
- Abdelrahim Qaddoumi, Jordan Kodner, Salam Khalifa, Ellen Broselow, and Owen Rambow. 2026. Syllable Structures Across Arabic Varieties. In Proceedings of the 13th Workshop on NLP for Similar Languages, Varieties and Dialects, pages 250–260, Rabat, Morocco. Association for Computational Linguistics.