@inproceedings{al-badrashiny-etal-2017-layered,
title = "A Layered Language Model based Hybrid Approach to Automatic Full Diacritization of {A}rabic",
author = "Al-Badrashiny, Mohamed and
Hawwari, Abdelati and
Diab, Mona",
editor = "Habash, Nizar and
Diab, Mona and
Darwish, Kareem and
El-Hajj, Wassim and
Al-Khalifa, Hend and
Bouamor, Houda and
Tomeh, Nadi and
El-Haj, Mahmoud and
Zaghouani, Wajdi",
booktitle = "Proceedings of the Third {A}rabic Natural Language Processing Workshop",
month = apr,
year = "2017",
address = "Valencia, Spain",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-1321",
doi = "10.18653/v1/W17-1321",
pages = "177--184",
abstract = "In this paper we present a system for automatic Arabic text diacritization using three levels of analysis granularity in a layered back off manner. We build and exploit diacritized language models (LM) for each of three different levels of granularity: surface form, morphologically segmented into prefix/stem/suffix, and character level. For each of the passes, we use Viterbi search to pick the most probable diacritization per word in the input. We start with the surface form LM, followed by the morphological level, then finally we leverage the character level LM. Our system outperforms all of the published systems evaluated against the same training and test data. It achieves a 10.87{\%} WER for complete full diacritization including lexical and syntactic diacritization, and 3.0{\%} WER for lexical diacritization, ignoring syntactic diacritization.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="al-badrashiny-etal-2017-layered">
<titleInfo>
<title>A Layered Language Model based Hybrid Approach to Automatic Full Diacritization of Arabic</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mohamed</namePart>
<namePart type="family">Al-Badrashiny</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abdelati</namePart>
<namePart type="family">Hawwari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mona</namePart>
<namePart type="family">Diab</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Arabic Natural Language Processing Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nizar</namePart>
<namePart type="family">Habash</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mona</namePart>
<namePart type="family">Diab</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kareem</namePart>
<namePart type="family">Darwish</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wassim</namePart>
<namePart type="family">El-Hajj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nadi</namePart>
<namePart type="family">Tomeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mahmoud</namePart>
<namePart type="family">El-Haj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wajdi</namePart>
<namePart type="family">Zaghouani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Valencia, Spain</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper we present a system for automatic Arabic text diacritization using three levels of analysis granularity in a layered back off manner. We build and exploit diacritized language models (LM) for each of three different levels of granularity: surface form, morphologically segmented into prefix/stem/suffix, and character level. For each of the passes, we use Viterbi search to pick the most probable diacritization per word in the input. We start with the surface form LM, followed by the morphological level, then finally we leverage the character level LM. Our system outperforms all of the published systems evaluated against the same training and test data. It achieves a 10.87% WER for complete full diacritization including lexical and syntactic diacritization, and 3.0% WER for lexical diacritization, ignoring syntactic diacritization.</abstract>
<identifier type="citekey">al-badrashiny-etal-2017-layered</identifier>
<identifier type="doi">10.18653/v1/W17-1321</identifier>
<location>
<url>https://aclanthology.org/W17-1321</url>
</location>
<part>
<date>2017-04</date>
<extent unit="page">
<start>177</start>
<end>184</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Layered Language Model based Hybrid Approach to Automatic Full Diacritization of Arabic
%A Al-Badrashiny, Mohamed
%A Hawwari, Abdelati
%A Diab, Mona
%Y Habash, Nizar
%Y Diab, Mona
%Y Darwish, Kareem
%Y El-Hajj, Wassim
%Y Al-Khalifa, Hend
%Y Bouamor, Houda
%Y Tomeh, Nadi
%Y El-Haj, Mahmoud
%Y Zaghouani, Wajdi
%S Proceedings of the Third Arabic Natural Language Processing Workshop
%D 2017
%8 April
%I Association for Computational Linguistics
%C Valencia, Spain
%F al-badrashiny-etal-2017-layered
%X In this paper we present a system for automatic Arabic text diacritization using three levels of analysis granularity in a layered back off manner. We build and exploit diacritized language models (LM) for each of three different levels of granularity: surface form, morphologically segmented into prefix/stem/suffix, and character level. For each of the passes, we use Viterbi search to pick the most probable diacritization per word in the input. We start with the surface form LM, followed by the morphological level, then finally we leverage the character level LM. Our system outperforms all of the published systems evaluated against the same training and test data. It achieves a 10.87% WER for complete full diacritization including lexical and syntactic diacritization, and 3.0% WER for lexical diacritization, ignoring syntactic diacritization.
%R 10.18653/v1/W17-1321
%U https://aclanthology.org/W17-1321
%U https://doi.org/10.18653/v1/W17-1321
%P 177-184
Markdown (Informal)
[A Layered Language Model based Hybrid Approach to Automatic Full Diacritization of Arabic](https://aclanthology.org/W17-1321) (Al-Badrashiny et al., WANLP 2017)
ACL