@InProceedings{darwish-mubarak-abdelali:2017:W17-13,
  author    = {Darwish, Kareem  and  Mubarak, Hamdy  and  Abdelali, Ahmed},
  title     = {Arabic Diacritization: Stats, Rules, and Hacks},
  booktitle = {Proceedings of the Third Arabic Natural Language Processing Workshop},
  month     = {April},
  year      = {2017},
  address   = {Valencia, Spain},
  publisher = {Association for Computational Linguistics},
  pages     = {9--17},
  abstract  = {In this paper, we present a new and fast state-of-the-art Arabic diacritizer
	that guesses the diacritics of words and then their case endings.  We employ a
	Viterbi decoder at word-level with back-off to stem, morphological patterns,
	and transliteration and sequence labeling based diacritization of named
	entities.  For case endings, we use Support Vector Machine (SVM) based ranking
	coupled with morphological patterns and linguistic rules to properly guess case
	endings. We achieve a low word level diacritization error of 3.29% and 12.77%
	without and with case endings respectively on a new multi-genre free of
	copyright test set. We are making the diacritizer available for free for
	research purposes.},
  url       = {http://www.aclweb.org/anthology/W17-1302}
}

