@InProceedings{wu-EtAl:2019:S19-2,
  author    = {Wu, Zhenghao  and  Zheng, Hao  and  Wang, Jianming  and  Su, Weifeng  and  Fong, Jefferson},
  title     = {BNU-HKBU UIC NLP Team 2 at SemEval-2019 Task 6: Detecting Offensive Language Using BERT model},
  booktitle = {Proceedings of the 13th International Workshop on Semantic Evaluation},
  month     = {June},
  year      = {2019},
  address   = {Minneapolis, Minnesota, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {551--555},
  abstract  = {In this study we deal with the problem of identifying and categorizing offensive language in social media. Our group, BNU-HKBU UIC NLP Team2, use supervised classification along with multiple version of data generated by different ways of pre-processing the data. We then use the state-of-the-art model Bidirectional Encoder Representations from Transformers, or BERT (Devlin et al, 2018), to capture linguistic, syntactic and semantic features. Long range dependencies between each part of a sentence can be captured by BERT’s bidirectional encoder representations. Our results show 85.12\% accuracy and 80.57\% F1 scores in Subtask A (offensive language identification), 87.92\% accuracy and 50\% F1 scores in Subtask B (categorization of offense types), and 69.95\% accuracy and 50.47\% F1 score in Subtask C (offense target identification). Analysis of the results shows that distinguishing between targeted and untargeted offensive language is not a simple task. More work needs to be done on the unbalance data problem in Subtasks B and C. Some future work is also discussed.},
  url       = {http://www.aclweb.org/anthology/S19-2099}
}

