@InProceedings{mulki-EtAl:2019:S19-2,
  author    = {Mulki, Hala  and  Bechikh Ali, Chedi  and  Haddad, Hatem  and  Babaoğlu, Ismail},
  title     = {Tw-StAR at SemEval-2019 Task 5: N-gram embeddings for Hate Speech Detection in Multilingual Tweets},
  booktitle = {Proceedings of the 13th International Workshop on Semantic Evaluation},
  month     = {June},
  year      = {2019},
  address   = {Minneapolis, Minnesota, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {503--507},
  abstract  = {In this paper, we describe our contribution in SemEval-2019: subtask A of task 5 “Multilingual detection of hate speech against immigrants and women in Twitter (HatEval)”. We developed two hate speech detection model variants through Tw-StAR framework. While the first model adopted one-hot encoding ngrams to train an NB classifier, the second generated and learned n-gram embeddings within a feedforward neural network. For both models, specific terms, selected via MWT patterns, were tagged in the input data. With two feature types employed, we could investigate the ability of n-gram embeddings to rival one-hot n-grams. Our results showed that in English, n-gram embeddings outperformed one-hot ngrams. However, representing Spanish tweets by one-hot n-grams yielded a slightly better performance compared to that of n-gram embeddings. The official ranking indicated that Tw-StAR ranked 9th for English and 20th for Spanish.},
  url       = {http://www.aclweb.org/anthology/S19-2090}
}

