@InProceedings{belinkov-EtAl:2016:LT4DH,
  author    = {Belinkov, Yonatan  and  Magidow, Alexander  and  Romanov, Maxim  and  Shmidman, Avi  and  Koppel, Moshe},
  title     = {Shamela: A Large-Scale Historical Arabic Corpus},
  booktitle = {Proceedings of the Workshop on Language Technology Resources and Tools for Digital Humanities (LT4DH)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {45--53},
  abstract  = {Arabic is a widely-spoken language with a rich and long history spanning more
	than fourteen centuries. Yet existing Arabic corpora largely focus on the
	modern period or lack sufficient diachronic information. We develop a
	large-scale, historical corpus of Arabic of about 1 billion words from diverse
	periods of time. We clean this corpus, process it with a morphological
	analyzer, and enhance it by detecting parallel passages and automatically
	dating undated texts. We demonstrate its utility with selected case-studies in
	which we show its application to the digital humanities.},
  url       = {http://aclweb.org/anthology/W16-4007}
}

