@inproceedings{alahmari-2025-sadslyc,
title = "{SADSL}y{C}: A Corpus for Saudi {A}rabian Multi-dialect Identification through Song Lyrics",
author = "Alahmari, Salwa Saad",
editor = "Ezzini, Saad and
Alami, Hamza and
Berrada, Ismail and
Benlahbib, Abdessamad and
El Mahdaouy, Abdelkader and
Lamsiyah, Salima and
Derrouz, Hatim and
Haddad Haddad, Amal and
Jarrar, Mustafa and
El-Haj, Mo and
Mitkov, Ruslan and
Rayson, Paul",
booktitle = "Proceedings of the 4th Workshop on Arabic Corpus Linguistics (WACL-4)",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.wacl-1.4/",
pages = "38--43",
abstract = "This paper presents the Saudi Arabian Dialects Song Lyrics Corpus (SADSLyC), the first dataset featuring song lyrics from the five major Saudi dialects: Najdi (Central Region), Hijazi (Western Region), Shamali (Northern Region), Janoubi (Southern Region), and Shargawi (Eastern Region). The dataset consists of 31,358 sentences, with each sentence representing a self-contained verse in a song, totaling 151,841 words. Additionally, we present a baseline experiment using the SaudiBERT model to classify the fine-grained dialects in the SADSLyC Corpus. The model achieved an overall accuracy of 73{\%} on the test dataset."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="alahmari-2025-sadslyc">
<titleInfo>
<title>SADSLyC: A Corpus for Saudi Arabian Multi-dialect Identification through Song Lyrics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Salwa</namePart>
<namePart type="given">Saad</namePart>
<namePart type="family">Alahmari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Workshop on Arabic Corpus Linguistics (WACL-4)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saad</namePart>
<namePart type="family">Ezzini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hamza</namePart>
<namePart type="family">Alami</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ismail</namePart>
<namePart type="family">Berrada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abdessamad</namePart>
<namePart type="family">Benlahbib</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abdelkader</namePart>
<namePart type="family">El Mahdaouy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Salima</namePart>
<namePart type="family">Lamsiyah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hatim</namePart>
<namePart type="family">Derrouz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amal</namePart>
<namePart type="family">Haddad Haddad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mustafa</namePart>
<namePart type="family">Jarrar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mo</namePart>
<namePart type="family">El-Haj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Rayson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents the Saudi Arabian Dialects Song Lyrics Corpus (SADSLyC), the first dataset featuring song lyrics from the five major Saudi dialects: Najdi (Central Region), Hijazi (Western Region), Shamali (Northern Region), Janoubi (Southern Region), and Shargawi (Eastern Region). The dataset consists of 31,358 sentences, with each sentence representing a self-contained verse in a song, totaling 151,841 words. Additionally, we present a baseline experiment using the SaudiBERT model to classify the fine-grained dialects in the SADSLyC Corpus. The model achieved an overall accuracy of 73% on the test dataset.</abstract>
<identifier type="citekey">alahmari-2025-sadslyc</identifier>
<location>
<url>https://aclanthology.org/2025.wacl-1.4/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>38</start>
<end>43</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SADSLyC: A Corpus for Saudi Arabian Multi-dialect Identification through Song Lyrics
%A Alahmari, Salwa Saad
%Y Ezzini, Saad
%Y Alami, Hamza
%Y Berrada, Ismail
%Y Benlahbib, Abdessamad
%Y El Mahdaouy, Abdelkader
%Y Lamsiyah, Salima
%Y Derrouz, Hatim
%Y Haddad Haddad, Amal
%Y Jarrar, Mustafa
%Y El-Haj, Mo
%Y Mitkov, Ruslan
%Y Rayson, Paul
%S Proceedings of the 4th Workshop on Arabic Corpus Linguistics (WACL-4)
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F alahmari-2025-sadslyc
%X This paper presents the Saudi Arabian Dialects Song Lyrics Corpus (SADSLyC), the first dataset featuring song lyrics from the five major Saudi dialects: Najdi (Central Region), Hijazi (Western Region), Shamali (Northern Region), Janoubi (Southern Region), and Shargawi (Eastern Region). The dataset consists of 31,358 sentences, with each sentence representing a self-contained verse in a song, totaling 151,841 words. Additionally, we present a baseline experiment using the SaudiBERT model to classify the fine-grained dialects in the SADSLyC Corpus. The model achieved an overall accuracy of 73% on the test dataset.
%U https://aclanthology.org/2025.wacl-1.4/
%P 38-43
Markdown (Informal)
[SADSLyC: A Corpus for Saudi Arabian Multi-dialect Identification through Song Lyrics](https://aclanthology.org/2025.wacl-1.4/) (Alahmari, WACL 2025)
ACL