@Book{ALW2:2018,
  editor    = {Darja Fišer  and  Ruihong Huang  and  Vinodkumar Prabhakaran  and  Rob Voigt  and  Zeerak Waseem  and  Jacqueline Wernimont},
  title     = {Proceedings of the 2nd Workshop on Abusive Language Online (ALW2)},
  month     = {October},
  year      = {2018},
  address   = {Brussels, Belgium},
  publisher = {Association for Computational Linguistics},
  url       = {http://www.aclweb.org/anthology/W18-51}
}

@InProceedings{mishra-yannakoudakis-shutova:2018:ALW2,
  author    = {Mishra, Pushkar  and  Yannakoudakis, Helen  and  Shutova, Ekaterina},
  title     = {Neural Character-based Composition Models for Abuse Detection},
  booktitle = {Proceedings of the 2nd Workshop on Abusive Language Online (ALW2)},
  month     = {October},
  year      = {2018},
  address   = {Brussels, Belgium},
  publisher = {Association for Computational Linguistics},
  pages     = {1--10},
  abstract  = {The advent of social media in recent years has fed into some highly undesirable phenomena such as proliferation of offensive language, hate speech, sexist remarks, etc. on the Internet. In light of this, there have been several efforts to automate the detection and moderation of such abusive content. However, deliberate obfuscation of words by users to evade detection poses a serious challenge to the effectiveness of these efforts. The current state of the art approaches to abusive language detection, based on recurrent neural networks, do not explicitly address this problem and resort to a generic OOV (out of vocabulary) embedding for unseen words. However, in using a single embedding for all unseen words we lose the ability to distinguish between obfuscated and non-obfuscated or rare words. In this paper, we address this problem by designing a model that can compose embeddings for unseen words. We experimentally demonstrate that our approach significantly advances the current state of the art in abuse detection on datasets from two different domains, namely Twitter and Wikipedia talk page.},
  url       = {http://www.aclweb.org/anthology/W18-5101}
}

@InProceedings{degibert-EtAl:2018:ALW2,
  author    = {de Gibert, Ona  and  Perez, Naiara  and  García Pablos, Aitor  and  Cuadros, Montse},
  title     = {Hate Speech Dataset from a White Supremacy Forum},
  booktitle = {Proceedings of the 2nd Workshop on Abusive Language Online (ALW2)},
  month     = {October},
  year      = {2018},
  address   = {Brussels, Belgium},
  publisher = {Association for Computational Linguistics},
  pages     = {11--20},
  abstract  = {Hate speech is commonly defined as any communication that disparages a target group of people based on some characteristic such as race, colour, ethnicity, gender, sexual orientation, nationality, religion, or other characteristic.},
  url       = {http://www.aclweb.org/anthology/W18-5102}
}

@InProceedings{gunasekara-nejadgholi:2018:ALW2,
  author    = {Gunasekara, Isuru  and  Nejadgholi, Isar},
  title     = {A Review of Standard Text Classification Practices for Multi-label Toxicity Identification of Online Content},
  booktitle = {Proceedings of the 2nd Workshop on Abusive Language Online (ALW2)},
  month     = {October},
  year      = {2018},
  address   = {Brussels, Belgium},
  publisher = {Association for Computational Linguistics},
  pages     = {21--25},
  abstract  = {Language toxicity identification presents a gray area in the ethical debate surrounding freedom of speech and censorship. Today's social media landscape is littered with unfiltered content that can be anywhere from slightly abusive to hate inducing. In response, we focused on training a multi-label classifier to detect both the type and level of toxicity in online content. This content is typically colloquial and conversational in style. Its classification therefore requires huge amounts of annotated data due to its variability and inconsistency. We compare standard methods of text classification in this task. A conventional one-vs-rest SVM classifier with character and word level frequency-based representation of text reaches 0.9763 ROC AUC score. We demonstrated that leveraging more advanced technologies such as word embeddings, recurrent neural networks, attention mechanism, stacking of classifiers and semi-supervised training can improve the ROC AUC score of classification to 0.9862. We suggest that in order to choose the right model one has to consider the accuracy of models as well as inference complexity based on the application.},
  url       = {http://www.aclweb.org/anthology/W18-5103}
}

@InProceedings{kshirsagar-EtAl:2018:ALW2,
  author    = {Kshirsagar, Rohan  and  Cukuvac, Tyrus  and  McKeown, Kathy  and  McGregor, Susan},
  title     = {Predictive Embeddings for Hate Speech Detection on Twitter},
  booktitle = {Proceedings of the 2nd Workshop on Abusive Language Online (ALW2)},
  month     = {October},
  year      = {2018},
  address   = {Brussels, Belgium},
  publisher = {Association for Computational Linguistics},
  pages     = {26--32},
  abstract  = {We present a neural-network based approach to classifying online hate speech in general,},
  url       = {http://www.aclweb.org/anthology/W18-5104}
}

@InProceedings{vanaken-EtAl:2018:ALW2,
  author    = {van Aken, Betty  and  Risch, Julian  and  Krestel, Ralf  and  Löser, Alexander},
  title     = {Challenges for Toxic Comment Classification: An In-Depth Error Analysis},
  booktitle = {Proceedings of the 2nd Workshop on Abusive Language Online (ALW2)},
  month     = {October},
  year      = {2018},
  address   = {Brussels, Belgium},
  publisher = {Association for Computational Linguistics},
  pages     = {33--42},
  abstract  = {Toxic comment classification has become an active research field with many recently proposed approaches. However, while these approaches address some of the task’s challenges others still remain unsolved and directions for further research are needed. To this end, we compare different deep learning and shallow approaches on a new, large comment dataset and propose an ensemble that outperforms all individual models. Further, we validate our findings on a second dataset. The results of the ensemble enable us to perform an extensive error analysis, which reveals open challenges for state-of-the-art methods and directions towards pending future research. These challenges include missing paradigmatic context and inconsistent dataset labels.},
  url       = {http://www.aclweb.org/anthology/W18-5105}
}

@InProceedings{singh-EtAl:2018:ALW2,
  author    = {Singh, Vinay  and  Varshney, Aman  and  Akhtar, Syed Sarfaraz  and  Vijay, Deepanshu  and  Shrivastava, Manish},
  title     = {Aggression Detection on Social Media Text Using Deep Neural Networks},
  booktitle = {Proceedings of the 2nd Workshop on Abusive Language Online (ALW2)},
  month     = {October},
  year      = {2018},
  address   = {Brussels, Belgium},
  publisher = {Association for Computational Linguistics},
  pages     = {43--50},
  abstract  = {In the past few years, bully and aggressive posts on social media have grown significantly, causing serious consequences for victims/users of all demographics. Majority of the work in this field has been done for English only. In this paper, we introduce a deep learning based classification system for Facebook posts and comments of Hindi-English Code-Mixed text to detect the aggressive behaviour of/towards users. Our work focuses on text from users majorly in the Indian Subcontinent. },
  url       = {http://www.aclweb.org/anthology/W18-5106}
}

@InProceedings{sprugnoli-EtAl:2018:ALW2,
  author    = {Sprugnoli, Rachele  and  Menini, Stefano  and  Tonelli, Sara  and  Oncini, Filippo  and  Piras, Enrico},
  title     = {Creating a WhatsApp Dataset to Study Pre-teen Cyberbullying},
  booktitle = {Proceedings of the 2nd Workshop on Abusive Language Online (ALW2)},
  month     = {October},
  year      = {2018},
  address   = {Brussels, Belgium},
  publisher = {Association for Computational Linguistics},
  pages     = {51--59},
  abstract  = {Although WhatsApp is used by teenagers as one major channel of cyberbullying, such interactions remain invisible due to the app privacy policies that do not allow ex-post data collection. Indeed, most of the information on these phenomena rely on surveys regarding self-reported data.},
  url       = {http://www.aclweb.org/anthology/W18-5107}
}

@InProceedings{vec-EtAl:2018:ALW2,
  author    = {Švec, Andrej  and  Pikuliak, Matúš  and  Simko, Marian  and  Bielikova, Maria},
  title     = {Improving Moderation of Online Discussions via Interpretable Neural Models},
  booktitle = {Proceedings of the 2nd Workshop on Abusive Language Online (ALW2)},
  month     = {October},
  year      = {2018},
  address   = {Brussels, Belgium},
  publisher = {Association for Computational Linguistics},
  pages     = {60--65},
  abstract  = {Growing amount of comments make online discussions difficult to moderate by human moderators only. Antisocial behavior is a common occurrence that often discourages other users from participating in discussion. We propose a neural network based method that partially automates the moderation process. It consists of two steps. First, we detect inappropriate comments for moderators to see. Second, we highlight inappropriate parts within these comments to make the moderation faster. We evaluated our method on data from a major Slovak news discussion platform.},
  url       = {http://www.aclweb.org/anthology/W18-5108}
}

@InProceedings{caines-EtAl:2018:ALW2,
  author    = {Caines, Andrew  and  Pastrana, Sergio  and  Hutchings, Alice  and  Buttery, Paula},
  title     = {Aggressive language in an online hacking forum},
  booktitle = {Proceedings of the 2nd Workshop on Abusive Language Online (ALW2)},
  month     = {October},
  year      = {2018},
  address   = {Brussels, Belgium},
  publisher = {Association for Computational Linguistics},
  pages     = {66--74},
  abstract  = {We probe the heterogeneity in levels of abusive language in different sections of the Internet, using an annotated corpus of Wikipedia page edit comments to train a binary classifier for abuse detection. Our test data come from the CrimeBB Corpus of hacking-related forum posts and we find that (a) forum interactions are rarely abusive, (b) the abusive language which does exist tends to be relatively mild compared to that found in the Wikipedia comments domain, and tends to involve aggressive posturing rather than hate speech or threats of violence. We observe that the purpose of conversations in online forums tend to be more constructive and informative than those in Wikipedia page edit comments which are geared more towards adversarial interactions, and that this may explain the lower levels of abuse found in our forum data than in Wikipedia comments. Further work remains to be done to compare these results with other inter-domain classification experiments, and to understand the impact of aggressive language in forum conversations.},
  url       = {http://www.aclweb.org/anthology/W18-5109}
}

@InProceedings{fehnunsvg-gambck:2018:ALW2,
  author    = {Fehn Unsvåg, Elise  and  Gambäck, Björn},
  title     = {The Effects of User Features on Twitter Hate Speech Detection},
  booktitle = {Proceedings of the 2nd Workshop on Abusive Language Online (ALW2)},
  month     = {October},
  year      = {2018},
  address   = {Brussels, Belgium},
  publisher = {Association for Computational Linguistics},
  pages     = {75--85},
  abstract  = {The paper investigates the potential effects user features have on hate speech classification. A quantitative analysis of Twitter data was conducted to better understand user characteristics, but no correlations were found between hateful text and the characteristics of the users who had posted it. However, experiments with a hate speech classifier based on datasets from three different languages showed that combining certain user features with textual features gave slight improvements of classification performance. While the incorporation of user features resulted in varying impact on performance for the different datasets used, user network-related features provided the most consistent improvements.},
  url       = {http://www.aclweb.org/anthology/W18-5110}
}

@InProceedings{wang:2018:ALW2,
  author    = {Wang, Cindy},
  title     = {Interpreting Neural Network Hate Speech Classifiers},
  booktitle = {Proceedings of the 2nd Workshop on Abusive Language Online (ALW2)},
  month     = {October},
  year      = {2018},
  address   = {Brussels, Belgium},
  publisher = {Association for Computational Linguistics},
  pages     = {86--92},
  abstract  = {Neural network hate speech classifiers outperform other methods, but the prevalence of hate speech necessitates better interpretability for automated detection systems. We propose several techniques to visualize and understand the domain-specific semantic meaning of a network's internal structures.},
  url       = {http://www.aclweb.org/anthology/W18-5111}
}

@InProceedings{magu-luo:2018:ALW2,
  author    = {Magu, Rijul  and  Luo, Jiebo},
  title     = {Determining Code Words in Euphemistic Hate Speech Using Word Embedding Networks},
  booktitle = {Proceedings of the 2nd Workshop on Abusive Language Online (ALW2)},
  month     = {October},
  year      = {2018},
  address   = {Brussels, Belgium},
  publisher = {Association for Computational Linguistics},
  pages     = {93--100},
  abstract  = {While analysis of online explicit abusive language detection has lately seen an ever-increasing focus, implicit abuse detection remains a largely unexplored space. We carry out a study on a subcategory of implicit hate: euphemistic hate speech. We propose a method to assist in identifying unknown euphemisms (or code words) given a set of hateful tweets containing a known code word. Our approach leverages word embeddings and network analysis (through centrality measures and community detection) in a manner that can be generalized to identify euphemisms across contexts- not just hate speech.},
  url       = {http://www.aclweb.org/anthology/W18-5112}
}

@InProceedings{lee-yoon-jung:2018:ALW2,
  author    = {Lee, Younghun  and  Yoon, Seunghyun  and  Jung, Kyomin},
  title     = {Comparative Studies of Detecting Abusive Language on Twitter},
  booktitle = {Proceedings of the 2nd Workshop on Abusive Language Online (ALW2)},
  month     = {October},
  year      = {2018},
  address   = {Brussels, Belgium},
  publisher = {Association for Computational Linguistics},
  pages     = {101--106},
  abstract  = {The context-dependent nature of online aggression makes annotating large collections of data extremely difficult. Previously studied datasets in abusive language detection have been insufficient in size to efficiently train deep learning models. Recently, Hate and Abusive Speech on Twitter, a dataset much greater in size and reliability, has been released. However, this dataset has not been comprehensively studied to its potential. In this paper, we conduct the first comparative study of various learning models on Hate and Abusive Speech on Twitter, and discuss the possibility of using additional features and context data for improvements. Experimental results show that bidirectional GRU networks trained on word-level features, with Latent Topic Clustering modules, is the most accurate model scoring 0.805 F1.},
  url       = {http://www.aclweb.org/anthology/W18-5113}
}

@InProceedings{sharifirad-jafarpour-matwin:2018:ALW2,
  author    = {sharifirad, sima  and  Jafarpour, Borna  and  Matwin, Stan},
  title     = {Boosting Text Classification Performance on Sexist Tweets by Text Augmentation and Text Generation Using a Combination of Knowledge Graphs},
  booktitle = {Proceedings of the 2nd Workshop on Abusive Language Online (ALW2)},
  month     = {October},
  year      = {2018},
  address   = {Brussels, Belgium},
  publisher = {Association for Computational Linguistics},
  pages     = {107--114},
  abstract  = {Text classification models have been heavily},
  url       = {http://www.aclweb.org/anthology/W18-5114}
}

@InProceedings{sahlgren-isbister-olsson:2018:ALW2,
  author    = {Sahlgren, Magnus  and  Isbister, Tim  and  Olsson, Fredrik},
  title     = {Learning Representations for Detecting Abusive Language},
  booktitle = {Proceedings of the 2nd Workshop on Abusive Language Online (ALW2)},
  month     = {October},
  year      = {2018},
  address   = {Brussels, Belgium},
  publisher = {Association for Computational Linguistics},
  pages     = {115--123},
  abstract  = {This paper discusses the question whether it is possible to learn a generic representation that is useful for detecting various types of abusive language. The approach is inspired by recent advances in transfer learning and word embeddings, and we learn representations from two different datasets containing various degrees of abusive language. We compare the learned representation with two standard approaches; one based on lexica, and one based on data-specific $n$-grams. Our experiments show that learned representations do contain useful information that can be used to improve detection performance when training data is limited.},
  url       = {http://www.aclweb.org/anthology/W18-5115}
}

@InProceedings{ljubei-erjavec-fier:2018:ALW2,
  author    = {Ljubešić, Nikola  and  Erjavec, Tomaž  and  Fišer, Darja},
  title     = {Datasets of Slovene and Croatian Moderated News Comments},
  booktitle = {Proceedings of the 2nd Workshop on Abusive Language Online (ALW2)},
  month     = {October},
  year      = {2018},
  address   = {Brussels, Belgium},
  publisher = {Association for Computational Linguistics},
  pages     = {124--131},
  abstract  = {This paper presents two large newly constructed datasets of moderated news comments from two highly popular online news portals in the respective countries: the Slovene RTV MCC and the Croatian 24sata. The datasets are analyzed by performing manual annotation of the types of the content which have been deleted by moderators and by investigating deletion trends among users and threads. Next, initial experiments on automatically detecting the deleted content in the datasets are presented. Both datasets are published in encrypted form, to enable others to perform experiments on detecting content to be deleted without revealing potentially inappropriate content. Finally, the baseline classification models trained on the non-encrypted datasets are disseminated as well to enable real-world use.},
  url       = {http://www.aclweb.org/anthology/W18-5116}
}

@InProceedings{karan-najder:2018:ALW2,
  author    = {Karan, Mladen  and  Šnajder, Jan},
  title     = {Cross-Domain Detection of Abusive Language Online},
  booktitle = {Proceedings of the 2nd Workshop on Abusive Language Online (ALW2)},
  month     = {October},
  year      = {2018},
  address   = {Brussels, Belgium},
  publisher = {Association for Computational Linguistics},
  pages     = {132--137},
  abstract  = {We investigate to what extent the models trained to detect general abusive language generalize between different datasets labeled with different abusive language types. To this end, we compare the cross-domain performance of simple classification models on nine different datasets, finding that the models fail to generalize to out-domain datasets and that having at least some in-domain data is important. We also show that using the frustratingly simple domain adaptation (Daume III, 2007) in most cases improves the results over in-domain training, specially when used to augment a smaller dataset with a larger one.},
  url       = {http://www.aclweb.org/anthology/W18-5117}
}

@InProceedings{mathur-EtAl:2018:ALW2,
  author    = {Mathur, Puneet  and  Sawhney, Ramit  and  Ayyar, Meghna  and  Shah, Rajiv},
  title     = {Did you offend me? Classification of Offensive Tweets in Hinglish Language},
  booktitle = {Proceedings of the 2nd Workshop on Abusive Language Online (ALW2)},
  month     = {October},
  year      = {2018},
  address   = {Brussels, Belgium},
  publisher = {Association for Computational Linguistics},
  pages     = {138--148},
  abstract  = {The use of code-switched languages e.g, Hinglish, which is derived by the blending of Hindi with the English language) is getting much popular on Twitter due to their ease of communication in native languages. However, spelling variations and absence of grammar rules introduce ambiguity and make it difficult to understand the text automatically. This paper presents the Multi-Input Multi-Channel Transfer Learning based model (MIMCT) to detect offensive (hate speech or abusive) Hinglish tweets from the proposed Hinglish Offensive Tweet (HOT) dataset using transfer learning coupled with multiple feature inputs. Specifically, it takes multiple primary word embedding along with secondary extracted features as inputs to train a multi-channel CNN-LSTM architecture that has been pre-trained on English tweets through transfer learning. The proposed MIMCT model outperforms the baseline supervised classification models, transfer learning based CNN and LSTM models to establish itself as the state of the art in the unexplored domain of Hinglish offensive text classification.},
  url       = {http://www.aclweb.org/anthology/W18-5118}
}

@InProceedings{wu-kambhatla-sarkar:2018:ALW2,
  author    = {Wu, Zhelun  and  Kambhatla, Nishant  and  Sarkar, Anoop},
  title     = {Decipherment for Adversarial Offensive Language Detection},
  booktitle = {Proceedings of the 2nd Workshop on Abusive Language Online (ALW2)},
  month     = {October},
  year      = {2018},
  address   = {Brussels, Belgium},
  publisher = {Association for Computational Linguistics},
  pages     = {149--159},
  abstract  = {Automated filters are commonly used by online services to stop users from sending age-inappropriate, bullying messages, or asking others to expose personal information. Previous work has focused on rules or classifiers to detect and filter offensive messages, but these are vulnerable to cleverly disguised plaintext and unseen expressions especially in an adversarial setting where the users can repeatedly try to bypass the filter. In this paper, we model the disguised messages as if they are produced by encrypting the original message using an invented cipher. We apply automatic decipherment techniques to decode the disguised malicious text, which can be then filtered using rules or classifiers. We provide experimental results on three different datasets and show that decipherment is an effective tool for this task.},
  url       = {http://www.aclweb.org/anthology/W18-5119}
}

@InProceedings{castelle:2018:ALW2,
  author    = {Castelle, Michael},
  title     = {The Linguistic Ideologies of Deep Abusive Language Classification},
  booktitle = {Proceedings of the 2nd Workshop on Abusive Language Online (ALW2)},
  month     = {October},
  year      = {2018},
  address   = {Brussels, Belgium},
  publisher = {Association for Computational Linguistics},
  pages     = {160--170},
  abstract  = {This paper brings together theories from sociolinguistics and linguistic anthropology to critically evaluate the so-called “language ideologies” — the set of beliefs and ways of speaking about language — in the practices of abusive language classification in modern machine learning-based NLP. This argument is made at both a conceptual and empirical level, as we review approaches to abusive language from different fields, and use two neural network methods to analyze three datasets developed for abusive language classification tasks (drawn from Wikipedia, Facebook, and StackOverflow). By evaluating and comparing these results, we argue for the importance of incorporating theories of pragmatics and metapragmatics into both the design of classification tasks as well as in ML architectures.},
  url       = {http://www.aclweb.org/anthology/W18-5120}
}