@inproceedings{alabbasi-etal-2022-gulf,
title = "{G}ulf {A}rabic Diacritization: Guidelines, Initial Dataset, and Results",
author = "Alabbasi, Nouf and
Al-Badrashiny, Mohamed and
Aldahmani, Maryam and
AlDhanhani, Ahmed and
Alhashmi, Abdullah Saleh and
Alhashmi, Fawaghy Ahmed and
Al Hashemi, Khalid and
Alkhobbi, Rama Emad and
Al Maazmi, Shamma T and
Alyafeai, Mohammed Ali and
Alzaabi, Mariam M and
Alzaabi, Mohamed Saqer and
Badri, Fatma Khalid and
Darwish, Kareem and
Diab, Ehab Mansour and
Elmallah, Muhammad Morsy and
Elnashar, Amira Ayman and
Elneima, Ashraf Hatim and
Kabbani, MHD Tameem and
Rabih, Nour and
Saad, Ahmad and
Sousou, Ammar Mamoun",
booktitle = "Proceedings of the The Seventh Arabic Natural Language Processing Workshop (WANLP)",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.wanlp-1.33",
doi = "10.18653/v1/2022.wanlp-1.33",
pages = "356--360",
abstract = "Arabic diacritic recovery is important for a variety of downstream tasks such as text-to-speech. In this paper, we introduce a new Gulf Arabic diacritization dataset composed of 19,850 words based on a subset of the Gumar corpus. We provide comprehensive set of guidelines for diacritization to enable the diacritization of more data. We also report on diacritization results based on the new corpus using a Hidden Markov Model and character-based sequence to sequence models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="alabbasi-etal-2022-gulf">
<titleInfo>
<title>Gulf Arabic Diacritization: Guidelines, Initial Dataset, and Results</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nouf</namePart>
<namePart type="family">Alabbasi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohamed</namePart>
<namePart type="family">Al-Badrashiny</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maryam</namePart>
<namePart type="family">Aldahmani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahmed</namePart>
<namePart type="family">AlDhanhani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abdullah</namePart>
<namePart type="given">Saleh</namePart>
<namePart type="family">Alhashmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fawaghy</namePart>
<namePart type="given">Ahmed</namePart>
<namePart type="family">Alhashmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Al Hashemi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rama</namePart>
<namePart type="given">Emad</namePart>
<namePart type="family">Alkhobbi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shamma</namePart>
<namePart type="given">T</namePart>
<namePart type="family">Al Maazmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammed</namePart>
<namePart type="given">Ali</namePart>
<namePart type="family">Alyafeai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mariam</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Alzaabi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohamed</namePart>
<namePart type="given">Saqer</namePart>
<namePart type="family">Alzaabi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fatma</namePart>
<namePart type="given">Khalid</namePart>
<namePart type="family">Badri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kareem</namePart>
<namePart type="family">Darwish</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ehab</namePart>
<namePart type="given">Mansour</namePart>
<namePart type="family">Diab</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Muhammad</namePart>
<namePart type="given">Morsy</namePart>
<namePart type="family">Elmallah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amira</namePart>
<namePart type="given">Ayman</namePart>
<namePart type="family">Elnashar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ashraf</namePart>
<namePart type="given">Hatim</namePart>
<namePart type="family">Elneima</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">MHD</namePart>
<namePart type="given">Tameem</namePart>
<namePart type="family">Kabbani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nour</namePart>
<namePart type="family">Rabih</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahmad</namePart>
<namePart type="family">Saad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ammar</namePart>
<namePart type="given">Mamoun</namePart>
<namePart type="family">Sousou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the The Seventh Arabic Natural Language Processing Workshop (WANLP)</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates (Hybrid)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Arabic diacritic recovery is important for a variety of downstream tasks such as text-to-speech. In this paper, we introduce a new Gulf Arabic diacritization dataset composed of 19,850 words based on a subset of the Gumar corpus. We provide comprehensive set of guidelines for diacritization to enable the diacritization of more data. We also report on diacritization results based on the new corpus using a Hidden Markov Model and character-based sequence to sequence models.</abstract>
<identifier type="citekey">alabbasi-etal-2022-gulf</identifier>
<identifier type="doi">10.18653/v1/2022.wanlp-1.33</identifier>
<location>
<url>https://aclanthology.org/2022.wanlp-1.33</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>356</start>
<end>360</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Gulf Arabic Diacritization: Guidelines, Initial Dataset, and Results
%A Alabbasi, Nouf
%A Al-Badrashiny, Mohamed
%A Aldahmani, Maryam
%A AlDhanhani, Ahmed
%A Alhashmi, Abdullah Saleh
%A Alhashmi, Fawaghy Ahmed
%A Al Hashemi, Khalid
%A Alkhobbi, Rama Emad
%A Al Maazmi, Shamma T.
%A Alyafeai, Mohammed Ali
%A Alzaabi, Mariam M.
%A Alzaabi, Mohamed Saqer
%A Badri, Fatma Khalid
%A Darwish, Kareem
%A Diab, Ehab Mansour
%A Elmallah, Muhammad Morsy
%A Elnashar, Amira Ayman
%A Elneima, Ashraf Hatim
%A Kabbani, MHD Tameem
%A Rabih, Nour
%A Saad, Ahmad
%A Sousou, Ammar Mamoun
%S Proceedings of the The Seventh Arabic Natural Language Processing Workshop (WANLP)
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates (Hybrid)
%F alabbasi-etal-2022-gulf
%X Arabic diacritic recovery is important for a variety of downstream tasks such as text-to-speech. In this paper, we introduce a new Gulf Arabic diacritization dataset composed of 19,850 words based on a subset of the Gumar corpus. We provide comprehensive set of guidelines for diacritization to enable the diacritization of more data. We also report on diacritization results based on the new corpus using a Hidden Markov Model and character-based sequence to sequence models.
%R 10.18653/v1/2022.wanlp-1.33
%U https://aclanthology.org/2022.wanlp-1.33
%U https://doi.org/10.18653/v1/2022.wanlp-1.33
%P 356-360
Markdown (Informal)
[Gulf Arabic Diacritization: Guidelines, Initial Dataset, and Results](https://aclanthology.org/2022.wanlp-1.33) (Alabbasi et al., WANLP 2022)
ACL
- Nouf Alabbasi, Mohamed Al-Badrashiny, Maryam Aldahmani, Ahmed AlDhanhani, Abdullah Saleh Alhashmi, Fawaghy Ahmed Alhashmi, Khalid Al Hashemi, Rama Emad Alkhobbi, Shamma T Al Maazmi, Mohammed Ali Alyafeai, Mariam M Alzaabi, Mohamed Saqer Alzaabi, Fatma Khalid Badri, Kareem Darwish, Ehab Mansour Diab, Muhammad Morsy Elmallah, Amira Ayman Elnashar, Ashraf Hatim Elneima, MHD Tameem Kabbani, et al.. 2022. Gulf Arabic Diacritization: Guidelines, Initial Dataset, and Results. In Proceedings of the The Seventh Arabic Natural Language Processing Workshop (WANLP), pages 356–360, Abu Dhabi, United Arab Emirates (Hybrid). Association for Computational Linguistics.