@InProceedings{baly-EtAl:2017:W17-13,
  author    = {Baly, Ramy  and  Badaro, Gilbert  and  El-Khoury, Georges  and  Moukalled, Rawan  and  Aoun, Rita  and  Hajj, Hazem  and  El-Hajj, Wassim  and  Habash, Nizar  and  Shaban, Khaled},
  title     = {A Characterization Study of Arabic Twitter Data with a Benchmarking for State-of-the-Art Opinion Mining Models},
  booktitle = {Proceedings of the Third Arabic Natural Language Processing Workshop},
  month     = {April},
  year      = {2017},
  address   = {Valencia, Spain},
  publisher = {Association for Computational Linguistics},
  pages     = {110--118},
  abstract  = {Opinion mining in Arabic is a challenging task given the rich morphology of
	the language. The task becomes more challenging when it is applied to Twitter
	data, which contains additional sources of noise, such as the use of
	unstandardized dialectal variations, the nonconformation to grammatical rules,
	the use of Arabizi and code-switching, and the use of non-text objects such as
	images and URLs to express opinion. In this paper, we perform an analytical
	study to observe how such linguistic phenomena
	vary across different Arab regions. This study of Arabic Twitter
	characterization aims at providing better understanding of Arabic Tweets, and
	fostering advanced research on the topic. Furthermore, we explore the
	performance of the two schools of machine learning on Arabic Twitter, namely
	the feature engineering approach and the deep learning approach. We consider
	models that have achieved state-of-the-art performance for opinion mining in
	English. Results highlight the advantages of using deep learning-based models,
	and confirm the importance of using morphological abstractions to address
	Arabic’s complex morphology.},
  url       = {http://www.aclweb.org/anthology/W17-1314}
}

