@inproceedings{fashwan-alansary-2017-shakkil,
title = "{SHAKKIL}: An Automatic Diacritization System for {M}odern {S}tandard {A}rabic Texts",
author = "Fashwan, Amany and
Alansary, Sameh",
editor = "Habash, Nizar and
Diab, Mona and
Darwish, Kareem and
El-Hajj, Wassim and
Al-Khalifa, Hend and
Bouamor, Houda and
Tomeh, Nadi and
El-Haj, Mahmoud and
Zaghouani, Wajdi",
booktitle = "Proceedings of the Third {A}rabic Natural Language Processing Workshop",
month = apr,
year = "2017",
address = "Valencia, Spain",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-1311",
doi = "10.18653/v1/W17-1311",
pages = "84--93",
abstract = "This paper sheds light on a system that would be able to diacritize Arabic texts automatically (SHAKKIL). In this system, the diacritization problem will be handled through two levels; morphological and syntactic processing levels. The adopted morphological disambiguation algorithm depends on four layers; Uni-morphological form layer, rule-based morphological disambiguation layer, statistical-based disambiguation layer and Out Of Vocabulary (OOV) layer. The adopted syntactic disambiguation algorithms is concerned with detecting the case ending diacritics depending on a rule based approach simulating the shallow parsing technique. This will be achieved using an annotated corpus for extracting the Arabic linguistic rules, building the language models and testing the system output. This system is considered as a good trial of the interaction between rule-based approach and statistical approach, where the rules can help the statistics in detecting the right diacritization and vice versa. At this point, the morphological Word Error Rate (WER) is 4.56{\%} while the morphological Diacritic Error Rate (DER) is 1.88{\%} and the syntactic WER is 9.36{\%}. The best WER is 14.78{\%} compared to the best-published results, of (Abandah, 2015); 11.68{\%}, (Rashwan, et al., 2015); 12.90{\%} and (Metwally, Rashwan, {\&} Atiya, 2016); 13.70{\%}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fashwan-alansary-2017-shakkil">
<titleInfo>
<title>SHAKKIL: An Automatic Diacritization System for Modern Standard Arabic Texts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Amany</namePart>
<namePart type="family">Fashwan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sameh</namePart>
<namePart type="family">Alansary</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Arabic Natural Language Processing Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nizar</namePart>
<namePart type="family">Habash</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mona</namePart>
<namePart type="family">Diab</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kareem</namePart>
<namePart type="family">Darwish</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wassim</namePart>
<namePart type="family">El-Hajj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nadi</namePart>
<namePart type="family">Tomeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mahmoud</namePart>
<namePart type="family">El-Haj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wajdi</namePart>
<namePart type="family">Zaghouani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Valencia, Spain</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper sheds light on a system that would be able to diacritize Arabic texts automatically (SHAKKIL). In this system, the diacritization problem will be handled through two levels; morphological and syntactic processing levels. The adopted morphological disambiguation algorithm depends on four layers; Uni-morphological form layer, rule-based morphological disambiguation layer, statistical-based disambiguation layer and Out Of Vocabulary (OOV) layer. The adopted syntactic disambiguation algorithms is concerned with detecting the case ending diacritics depending on a rule based approach simulating the shallow parsing technique. This will be achieved using an annotated corpus for extracting the Arabic linguistic rules, building the language models and testing the system output. This system is considered as a good trial of the interaction between rule-based approach and statistical approach, where the rules can help the statistics in detecting the right diacritization and vice versa. At this point, the morphological Word Error Rate (WER) is 4.56% while the morphological Diacritic Error Rate (DER) is 1.88% and the syntactic WER is 9.36%. The best WER is 14.78% compared to the best-published results, of (Abandah, 2015); 11.68%, (Rashwan, et al., 2015); 12.90% and (Metwally, Rashwan, & Atiya, 2016); 13.70%.</abstract>
<identifier type="citekey">fashwan-alansary-2017-shakkil</identifier>
<identifier type="doi">10.18653/v1/W17-1311</identifier>
<location>
<url>https://aclanthology.org/W17-1311</url>
</location>
<part>
<date>2017-04</date>
<extent unit="page">
<start>84</start>
<end>93</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SHAKKIL: An Automatic Diacritization System for Modern Standard Arabic Texts
%A Fashwan, Amany
%A Alansary, Sameh
%Y Habash, Nizar
%Y Diab, Mona
%Y Darwish, Kareem
%Y El-Hajj, Wassim
%Y Al-Khalifa, Hend
%Y Bouamor, Houda
%Y Tomeh, Nadi
%Y El-Haj, Mahmoud
%Y Zaghouani, Wajdi
%S Proceedings of the Third Arabic Natural Language Processing Workshop
%D 2017
%8 April
%I Association for Computational Linguistics
%C Valencia, Spain
%F fashwan-alansary-2017-shakkil
%X This paper sheds light on a system that would be able to diacritize Arabic texts automatically (SHAKKIL). In this system, the diacritization problem will be handled through two levels; morphological and syntactic processing levels. The adopted morphological disambiguation algorithm depends on four layers; Uni-morphological form layer, rule-based morphological disambiguation layer, statistical-based disambiguation layer and Out Of Vocabulary (OOV) layer. The adopted syntactic disambiguation algorithms is concerned with detecting the case ending diacritics depending on a rule based approach simulating the shallow parsing technique. This will be achieved using an annotated corpus for extracting the Arabic linguistic rules, building the language models and testing the system output. This system is considered as a good trial of the interaction between rule-based approach and statistical approach, where the rules can help the statistics in detecting the right diacritization and vice versa. At this point, the morphological Word Error Rate (WER) is 4.56% while the morphological Diacritic Error Rate (DER) is 1.88% and the syntactic WER is 9.36%. The best WER is 14.78% compared to the best-published results, of (Abandah, 2015); 11.68%, (Rashwan, et al., 2015); 12.90% and (Metwally, Rashwan, & Atiya, 2016); 13.70%.
%R 10.18653/v1/W17-1311
%U https://aclanthology.org/W17-1311
%U https://doi.org/10.18653/v1/W17-1311
%P 84-93
Markdown (Informal)
[SHAKKIL: An Automatic Diacritization System for Modern Standard Arabic Texts](https://aclanthology.org/W17-1311) (Fashwan & Alansary, WANLP 2017)
ACL