@InProceedings{elballouli-EtAl:2017:W17-13,
  author    = {El Ballouli, Rim  and  El-Hajj, Wassim  and  Ghandour, Ahmad  and  Elbassuoni, Shady  and  Hajj, Hazem  and  Shaban, Khaled},
  title     = {CAT: Credibility Analysis of Arabic Content on Twitter},
  booktitle = {Proceedings of the Third Arabic Natural Language Processing Workshop},
  month     = {April},
  year      = {2017},
  address   = {Valencia, Spain},
  publisher = {Association for Computational Linguistics},
  pages     = {62--71},
  abstract  = {Data generated on Twitter has become a rich source for various data mining
	tasks. 
	Those data analysis tasks that are dependent on the tweet semantics, such as
	sentiment 
	analysis, emotion mining, and rumor detection among others, suffer considerably
	if 
	the tweet is not credible, not real, or spam. In this paper, we perform an
	extensive 
	analysis on credibility of Arabic content on Twitter. We also build a
	classification 
	model (CAT) to automatically predict the credibility of a given Arabic tweet.
	Of particular originality is the inclusion of features extracted directly 
	or indirectly from the author's profile and timeline. To train and test CAT, we
	annotated for credibility a data set of 9,000 Arabic tweets that are 
	topic independent. CAT achieved consistent improvements 
	in predicting the credibility of the tweets when compared to several baselines
	and when 
	compared to the state-of-the-art approach with an improvement of 21% in
	weighted 
	average F-measure. We also conducted experiments to highlight the importance of
	the 
	user-based features as opposed to the content-based 
	features. We conclude our work with a feature reduction 
	experiment that highlights the best indicative features of credibility.},
  url       = {http://www.aclweb.org/anthology/W17-1308}
}