@inproceedings{mubarak-etal-2019-system,
title = "A System for Diacritizing Four Varieties of {A}rabic",
author = "Mubarak, Hamdy and
Abdelali, Ahmed and
Darwish, Kareem and
Eldesouki, Mohamed and
Samih, Younes and
Sajjad, Hassan",
editor = "Pad{\'o}, Sebastian and
Huang, Ruihong",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP): System Demonstrations",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D19-3037/",
doi = "10.18653/v1/D19-3037",
pages = "217--222",
abstract = "Short vowels, aka diacritics, are more often omitted when writing different varieties of Arabic including Modern Standard Arabic (MSA), Classical Arabic (CA), and Dialectal Arabic (DA). However, diacritics are required to properly pronounce words, which makes diacritic restoration (a.k.a. diacritization) essential for language learning and text-to-speech applications. In this paper, we present a system for diacritizing MSA, CA, and two varieties of DA, namely Moroccan and Tunisian. The system uses a character level sequence-to-sequence deep learning model that requires no feature engineering and beats all previous SOTA systems for all the Arabic varieties that we test on."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mubarak-etal-2019-system">
<titleInfo>
<title>A System for Diacritizing Four Varieties of Arabic</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hamdy</namePart>
<namePart type="family">Mubarak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahmed</namePart>
<namePart type="family">Abdelali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kareem</namePart>
<namePart type="family">Darwish</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohamed</namePart>
<namePart type="family">Eldesouki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Younes</namePart>
<namePart type="family">Samih</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hassan</namePart>
<namePart type="family">Sajjad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP): System Demonstrations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Padó</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruihong</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hong Kong, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Short vowels, aka diacritics, are more often omitted when writing different varieties of Arabic including Modern Standard Arabic (MSA), Classical Arabic (CA), and Dialectal Arabic (DA). However, diacritics are required to properly pronounce words, which makes diacritic restoration (a.k.a. diacritization) essential for language learning and text-to-speech applications. In this paper, we present a system for diacritizing MSA, CA, and two varieties of DA, namely Moroccan and Tunisian. The system uses a character level sequence-to-sequence deep learning model that requires no feature engineering and beats all previous SOTA systems for all the Arabic varieties that we test on.</abstract>
<identifier type="citekey">mubarak-etal-2019-system</identifier>
<identifier type="doi">10.18653/v1/D19-3037</identifier>
<location>
<url>https://aclanthology.org/D19-3037/</url>
</location>
<part>
<date>2019-11</date>
<extent unit="page">
<start>217</start>
<end>222</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A System for Diacritizing Four Varieties of Arabic
%A Mubarak, Hamdy
%A Abdelali, Ahmed
%A Darwish, Kareem
%A Eldesouki, Mohamed
%A Samih, Younes
%A Sajjad, Hassan
%Y Padó, Sebastian
%Y Huang, Ruihong
%S Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP): System Demonstrations
%D 2019
%8 November
%I Association for Computational Linguistics
%C Hong Kong, China
%F mubarak-etal-2019-system
%X Short vowels, aka diacritics, are more often omitted when writing different varieties of Arabic including Modern Standard Arabic (MSA), Classical Arabic (CA), and Dialectal Arabic (DA). However, diacritics are required to properly pronounce words, which makes diacritic restoration (a.k.a. diacritization) essential for language learning and text-to-speech applications. In this paper, we present a system for diacritizing MSA, CA, and two varieties of DA, namely Moroccan and Tunisian. The system uses a character level sequence-to-sequence deep learning model that requires no feature engineering and beats all previous SOTA systems for all the Arabic varieties that we test on.
%R 10.18653/v1/D19-3037
%U https://aclanthology.org/D19-3037/
%U https://doi.org/10.18653/v1/D19-3037
%P 217-222
Markdown (Informal)
[A System for Diacritizing Four Varieties of Arabic](https://aclanthology.org/D19-3037/) (Mubarak et al., EMNLP-IJCNLP 2019)
ACL
- Hamdy Mubarak, Ahmed Abdelali, Kareem Darwish, Mohamed Eldesouki, Younes Samih, and Hassan Sajjad. 2019. A System for Diacritizing Four Varieties of Arabic. In Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP): System Demonstrations, pages 217–222, Hong Kong, China. Association for Computational Linguistics.