@InProceedings{wang:2018:ALW2,
  author    = {Wang, Cindy},
  title     = {Interpreting Neural Network Hate Speech Classifiers},
  booktitle = {Proceedings of the 2nd Workshop on Abusive Language Online (ALW2)},
  month     = {October},
  year      = {2018},
  address   = {Brussels, Belgium},
  publisher = {Association for Computational Linguistics},
  pages     = {86--92},
  abstract  = {Neural network hate speech classifiers outperform other methods, but the prevalence of hate speech necessitates better interpretability for automated detection systems. We propose several techniques to visualize and understand the domain-specific semantic meaning of a network's internal structures.},
  url       = {http://www.aclweb.org/anthology/W18-5111}
}

