@InProceedings{simaki-EtAl:2017:RANLP,
  author    = {Simaki, Vasiliki  and  Simakis, Panagiotis  and  Paradis, Carita  and  Kerren, Andreas},
  title     = {Identifying the Authors' National Variety of English in Social Media Texts},
  booktitle = {Proceedings of the International Conference Recent Advances in Natural Language Processing, RANLP 2017},
  month     = {September},
  year      = {2017},
  address   = {Varna, Bulgaria},
  publisher = {INCOMA Ltd.},
  pages     = {671--678},
  abstract  = {In this paper, we present a study for the identification of authors’ national
	variety of English in texts from social media. In data from Facebook and
	Twitter, information about the author’s social profile is annotated, and the
	national English variety (US, UK, AUS, CAN, NNS) that each author uses is
	attributed. We tested four feature types: formal linguistic features, POS
	features, lexicon-based features related to the different varieties, and
	data-based features from each English variety. We used various machine learning
	algorithms for the classification experiments, and we implemented a feature
	selection process. The classification accuracy achieved, when the 31 highest
	ranked features were used, was up to 77.32%. The experimental results are
	evaluated, and the efficacy of the ranked features discussed.},
  url       = {https://doi.org/10.26615/978-954-452-049-6_086}
}

