@inproceedings{khamis-ahmed-2026-llm,
title = "{LLM}-to-Speech: A Synthetic Data Pipeline for Training Dialectal Text-to-Speech Models",
author = "Khamis, Ahmed and
Ahmed, Hesham Ali",
booktitle = "Proceedings of the 2nd Workshop on {NLP} for Languages Using {A}rabic Script",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.abjadnlp-1.6/",
pages = "47--54",
abstract = "Despite the advances in neural text to speech (TTS), many Arabic dialectal varieties remain marginally addressed, with most resources con- centrated on Modern Spoken Arabic (MSA) and Gulf dialects, leaving Egyptian Arabic{---} the most widely understood Arabic dialect{---} severely under-resourced. We address this gap by introducing NileTTS: 38 hours of tran- scribed speech from two speakers across di- verse domains including medical, sales, and general conversations. We construct this dataset using a novel synthetic pipeline: large language models (LLM) generate Egyptian Arabic content, which is then converted to natu- ral speech using audio synthesis tools, followed by automatic transcription and speaker diariza- tion with manual quality verification. We fine- tune XTTS v2, a state-of-the-art multilingual TTS model, on our dataset and evaluate against the baseline model trained on other Arabic dialects. Our contributions include: (1) the first publicly available Egyptian Arabic TTS dataset, (2) a reproducible synthetic data gen- eration pipeline for dialectal TTS, and (3) an open-source fine-tuned model. All resources are released to advance Egyptian Arabic speech synthesis research."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="khamis-ahmed-2026-llm">
<titleInfo>
<title>LLM-to-Speech: A Synthetic Data Pipeline for Training Dialectal Text-to-Speech Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ahmed</namePart>
<namePart type="family">Khamis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hesham</namePart>
<namePart type="given">Ali</namePart>
<namePart type="family">Ahmed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on NLP for Languages Using Arabic Script</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Despite the advances in neural text to speech (TTS), many Arabic dialectal varieties remain marginally addressed, with most resources con- centrated on Modern Spoken Arabic (MSA) and Gulf dialects, leaving Egyptian Arabic— the most widely understood Arabic dialect— severely under-resourced. We address this gap by introducing NileTTS: 38 hours of tran- scribed speech from two speakers across di- verse domains including medical, sales, and general conversations. We construct this dataset using a novel synthetic pipeline: large language models (LLM) generate Egyptian Arabic content, which is then converted to natu- ral speech using audio synthesis tools, followed by automatic transcription and speaker diariza- tion with manual quality verification. We fine- tune XTTS v2, a state-of-the-art multilingual TTS model, on our dataset and evaluate against the baseline model trained on other Arabic dialects. Our contributions include: (1) the first publicly available Egyptian Arabic TTS dataset, (2) a reproducible synthetic data gen- eration pipeline for dialectal TTS, and (3) an open-source fine-tuned model. All resources are released to advance Egyptian Arabic speech synthesis research.</abstract>
<identifier type="citekey">khamis-ahmed-2026-llm</identifier>
<location>
<url>https://aclanthology.org/2026.abjadnlp-1.6/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>47</start>
<end>54</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T LLM-to-Speech: A Synthetic Data Pipeline for Training Dialectal Text-to-Speech Models
%A Khamis, Ahmed
%A Ahmed, Hesham Ali
%S Proceedings of the 2nd Workshop on NLP for Languages Using Arabic Script
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%F khamis-ahmed-2026-llm
%X Despite the advances in neural text to speech (TTS), many Arabic dialectal varieties remain marginally addressed, with most resources con- centrated on Modern Spoken Arabic (MSA) and Gulf dialects, leaving Egyptian Arabic— the most widely understood Arabic dialect— severely under-resourced. We address this gap by introducing NileTTS: 38 hours of tran- scribed speech from two speakers across di- verse domains including medical, sales, and general conversations. We construct this dataset using a novel synthetic pipeline: large language models (LLM) generate Egyptian Arabic content, which is then converted to natu- ral speech using audio synthesis tools, followed by automatic transcription and speaker diariza- tion with manual quality verification. We fine- tune XTTS v2, a state-of-the-art multilingual TTS model, on our dataset and evaluate against the baseline model trained on other Arabic dialects. Our contributions include: (1) the first publicly available Egyptian Arabic TTS dataset, (2) a reproducible synthetic data gen- eration pipeline for dialectal TTS, and (3) an open-source fine-tuned model. All resources are released to advance Egyptian Arabic speech synthesis research.
%U https://aclanthology.org/2026.abjadnlp-1.6/
%P 47-54
Markdown (Informal)
[LLM-to-Speech: A Synthetic Data Pipeline for Training Dialectal Text-to-Speech Models](https://aclanthology.org/2026.abjadnlp-1.6/) (Khamis & Ahmed, AbjadNLP 2026)
ACL