@inproceedings{waheed-etal-2023-voxarabica,
title = "{V}ox{A}rabica: A Robust Dialect-Aware {A}rabic Speech Recognition System",
author = "Waheed, Abdul and
Talafha, Bashar and
Sullivan, Peter and
Elmadany, AbdelRahim and
Abdul-Mageed, Muhammad",
editor = "Sawaf, Hassan and
El-Beltagy, Samhaa and
Zaghouani, Wajdi and
Magdy, Walid and
Abdelali, Ahmed and
Tomeh, Nadi and
Abu Farha, Ibrahim and
Habash, Nizar and
Khalifa, Salam and
Keleg, Amr and
Haddad, Hatem and
Zitouni, Imed and
Mrini, Khalil and
Almatham, Rawan",
booktitle = "Proceedings of ArabicNLP 2023",
month = dec,
year = "2023",
address = "Singapore (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.arabicnlp-1.38/",
doi = "10.18653/v1/2023.arabicnlp-1.38",
pages = "441--449",
abstract = "Arabic is a complex language with many varieties and dialects spoken by {\textasciitilde} 450 millions all around the world. Due to the linguistic diversity and vari-ations, it is challenging to build a robust and gen-eralized ASR system for Arabic. In this work, we address this gap by developing and demoing a system, dubbed VoxArabica, for dialect identi-fication (DID) as well as automatic speech recog-nition (ASR) of Arabic. We train a wide range of models such as HuBERT (DID), Whisper, and XLS-R (ASR) in a supervised setting for Arabic DID and ASR tasks. Our DID models are trained to identify 17 different dialects in addition to MSA. We finetune our ASR models on MSA, Egyptian, Moroccan, and mixed data. Additionally, for the re-maining dialects in ASR, we provide the option to choose various models such as Whisper and MMS in a zero-shot setting. We integrate these models into a single web interface with diverse features such as audio recording, file upload, model selec-tion, and the option to raise flags for incorrect out-puts. Overall, we believe VoxArabica will be use-ful for a wide range of audiences concerned with Arabic research. Our system is currently running at https://cdce-206-12-100-168.ngrok.io/."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="waheed-etal-2023-voxarabica">
<titleInfo>
<title>VoxArabica: A Robust Dialect-Aware Arabic Speech Recognition System</title>
</titleInfo>
<name type="personal">
<namePart type="given">Abdul</namePart>
<namePart type="family">Waheed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bashar</namePart>
<namePart type="family">Talafha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Sullivan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">AbdelRahim</namePart>
<namePart type="family">Elmadany</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Muhammad</namePart>
<namePart type="family">Abdul-Mageed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of ArabicNLP 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hassan</namePart>
<namePart type="family">Sawaf</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samhaa</namePart>
<namePart type="family">El-Beltagy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wajdi</namePart>
<namePart type="family">Zaghouani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Walid</namePart>
<namePart type="family">Magdy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahmed</namePart>
<namePart type="family">Abdelali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nadi</namePart>
<namePart type="family">Tomeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ibrahim</namePart>
<namePart type="family">Abu Farha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nizar</namePart>
<namePart type="family">Habash</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Salam</namePart>
<namePart type="family">Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amr</namePart>
<namePart type="family">Keleg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hatem</namePart>
<namePart type="family">Haddad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Imed</namePart>
<namePart type="family">Zitouni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalil</namePart>
<namePart type="family">Mrini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rawan</namePart>
<namePart type="family">Almatham</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore (Hybrid)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Arabic is a complex language with many varieties and dialects spoken by ~ 450 millions all around the world. Due to the linguistic diversity and vari-ations, it is challenging to build a robust and gen-eralized ASR system for Arabic. In this work, we address this gap by developing and demoing a system, dubbed VoxArabica, for dialect identi-fication (DID) as well as automatic speech recog-nition (ASR) of Arabic. We train a wide range of models such as HuBERT (DID), Whisper, and XLS-R (ASR) in a supervised setting for Arabic DID and ASR tasks. Our DID models are trained to identify 17 different dialects in addition to MSA. We finetune our ASR models on MSA, Egyptian, Moroccan, and mixed data. Additionally, for the re-maining dialects in ASR, we provide the option to choose various models such as Whisper and MMS in a zero-shot setting. We integrate these models into a single web interface with diverse features such as audio recording, file upload, model selec-tion, and the option to raise flags for incorrect out-puts. Overall, we believe VoxArabica will be use-ful for a wide range of audiences concerned with Arabic research. Our system is currently running at https://cdce-206-12-100-168.ngrok.io/.</abstract>
<identifier type="citekey">waheed-etal-2023-voxarabica</identifier>
<identifier type="doi">10.18653/v1/2023.arabicnlp-1.38</identifier>
<location>
<url>https://aclanthology.org/2023.arabicnlp-1.38/</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>441</start>
<end>449</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T VoxArabica: A Robust Dialect-Aware Arabic Speech Recognition System
%A Waheed, Abdul
%A Talafha, Bashar
%A Sullivan, Peter
%A Elmadany, AbdelRahim
%A Abdul-Mageed, Muhammad
%Y Sawaf, Hassan
%Y El-Beltagy, Samhaa
%Y Zaghouani, Wajdi
%Y Magdy, Walid
%Y Abdelali, Ahmed
%Y Tomeh, Nadi
%Y Abu Farha, Ibrahim
%Y Habash, Nizar
%Y Khalifa, Salam
%Y Keleg, Amr
%Y Haddad, Hatem
%Y Zitouni, Imed
%Y Mrini, Khalil
%Y Almatham, Rawan
%S Proceedings of ArabicNLP 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore (Hybrid)
%F waheed-etal-2023-voxarabica
%X Arabic is a complex language with many varieties and dialects spoken by ~ 450 millions all around the world. Due to the linguistic diversity and vari-ations, it is challenging to build a robust and gen-eralized ASR system for Arabic. In this work, we address this gap by developing and demoing a system, dubbed VoxArabica, for dialect identi-fication (DID) as well as automatic speech recog-nition (ASR) of Arabic. We train a wide range of models such as HuBERT (DID), Whisper, and XLS-R (ASR) in a supervised setting for Arabic DID and ASR tasks. Our DID models are trained to identify 17 different dialects in addition to MSA. We finetune our ASR models on MSA, Egyptian, Moroccan, and mixed data. Additionally, for the re-maining dialects in ASR, we provide the option to choose various models such as Whisper and MMS in a zero-shot setting. We integrate these models into a single web interface with diverse features such as audio recording, file upload, model selec-tion, and the option to raise flags for incorrect out-puts. Overall, we believe VoxArabica will be use-ful for a wide range of audiences concerned with Arabic research. Our system is currently running at https://cdce-206-12-100-168.ngrok.io/.
%R 10.18653/v1/2023.arabicnlp-1.38
%U https://aclanthology.org/2023.arabicnlp-1.38/
%U https://doi.org/10.18653/v1/2023.arabicnlp-1.38
%P 441-449
Markdown (Informal)
[VoxArabica: A Robust Dialect-Aware Arabic Speech Recognition System](https://aclanthology.org/2023.arabicnlp-1.38/) (Waheed et al., ArabicNLP 2023)
ACL