@InProceedings{albadrashiny-hawwari-diab:2017:W17-13,
  author    = {Al-Badrashiny, Mohamed  and  Hawwari, Abdelati  and  Diab, Mona},
  title     = {A Layered Language Model based Hybrid Approach to Automatic Full Diacritization of Arabic},
  booktitle = {Proceedings of the Third Arabic Natural Language Processing Workshop},
  month     = {April},
  year      = {2017},
  address   = {Valencia, Spain},
  publisher = {Association for Computational Linguistics},
  pages     = {177--184},
  abstract  = {In this paper we  present a system for automatic Arabic text diacritization
	using three levels of analysis granularity in a layered back off manner. We
	build and exploit diacritized language models (LM)  for each of three different
	levels of granularity: surface form, morphologically segmented into
	prefix/stem/suffix, and character level.  For each of the passes, we use
	Viterbi search to pick the most probable diacritization per word in the input.
	We start with the surface form LM, followed by the morphological level, then
	finally we leverage the character level LM. Our system outperforms all of the
	published systems evaluated against the same training and test data. It
	achieves a 10.87% WER for complete full diacritization including lexical and
	syntactic diacritization, and 3.0% WER for lexical diacritization, ignoring
	syntactic diacritization.},
  url       = {http://www.aclweb.org/anthology/W17-1321}
}

