@InProceedings{eldesouki-EtAl:2016:VarDial3,
  author    = {Eldesouki, Mohamed  and  Dalvi, Fahim  and  Sajjad, Hassan  and  Darwish, Kareem},
  title     = {QCRI $@$ DSL 2016: Spoken Arabic Dialect Identification Using Textual Features},
  booktitle = {Proceedings of the Third Workshop on NLP for Similar Languages, Varieties and Dialects (VarDial3)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {221--226},
  abstract  = {The paper describes the QCRI submissions to the task of automatic Arabic
	dialect classification into 5 Arabic variants, namely Egyptian, Gulf,
	Levantine,  North-African, and Modern Standard Arabic (MSA). The training data
	is relatively small and is automatically generated from an ASR system. To avoid
	over-fitting on such small data, we carefully selected and designed the
	features to capture the morphological essence of the different dialects. We
	submitted four runs to the Arabic sub-task. For all runs, we used a combined
	feature vector of character bi-grams, tri-grams, 4-grams, and 5-grams. We tried
	several machine-learning algorithms, namely Logistic Regression, Naive Bayes,
	Neural Networks, and Support Vector Machines (SVM) with linear and string
	kernels. However, our submitted runs used SVM with a linear kernel. In the
	closed submission, we got the best accuracy of 0.5136 and the third best
	weighted F1 score, with a difference less than 0.002 from the highest score.},
  url       = {http://aclweb.org/anthology/W16-4828}
}

