@InProceedings{addawood-EtAl:2017:NLPandCSS,
  author    = {Addawood, Aseel  and  Rezapour, Rezvaneh  and  Abdar, Omid  and  Diesner, Jana},
  title     = {Telling Apart Tweets Associated with Controversial versus Non-Controversial Topics},
  booktitle = {Proceedings of the Second Workshop on NLP and Computational Social Science},
  month     = {August},
  year      = {2017},
  address   = {Vancouver, Canada},
  publisher = {Association for Computational Linguistics},
  pages     = {32--41},
  abstract  = {In this paper, we evaluate the predictability of tweets associated with
	controversial versus non-controversial topics. As a first step, we
	crowd-sourced the scoring of a predefined set of topics on a Likert scale from
	non-controversial to controversial. Our feature set entails and goes beyond
	sentiment features, e.g., by leveraging empathic language and other features
	that have been previously used but are new for this particular study. We find
	focusing on the structural characteristics of tweets to be beneficial for this
	task. Using a combination of emphatic, language-specific, and Twitter-specific
	features for supervised learning resulted in 87% accuracy (F1) for
	cross-validation of the training set and 63.4% accuracy when using the test
	set. Our analysis shows that features specific to Twitter or social media, in
	general, are more prevalent in tweets on controversial topics than in
	non-controversial ones. To test the premise of the paper, we conducted two
	additional sets of experiments, which led to mixed results. This finding will
	inform our future investigations into the relationship between language use on
	social media and the perceived controversiality of topics.},
  url       = {http://www.aclweb.org/anthology/W17-2905}
}

