<?xml version="1.0" encoding="UTF-8" ?>
<volume id="W17">
  <paper id="3000">
    <title>Proceedings of the First Workshop on Abusive Language Online</title>
    <editor>Zeerak Waseem</editor>
    <editor>Wendy Hui Kyong Chung</editor>
    <editor>Dirk Hovy</editor>
    <editor>Joel Tetreault</editor>
    <month>August</month>
    <year>2017</year>
    <address>Vancouver, BC, Canada</address>
    <publisher>Association for Computational Linguistics</publisher>
    <url>http://www.aclweb.org/anthology/W17-30</url>
    <bibtype>book</bibtype>
    <bibkey>ALW1:2017</bibkey>
  </paper>

  <paper id="3001">
    <title>Dimensions of Abusive Language on Twitter</title>
    <author><first>Isobelle</first><last>Clarke</last></author>
    <author><first>Dr. Jack</first><last>Grieve</last></author>
    <booktitle>Proceedings of the First Workshop on Abusive Language Online</booktitle>
    <month>August</month>
    <year>2017</year>
    <address>Vancouver, BC, Canada</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>1&#8211;10</pages>
    <url>http://www.aclweb.org/anthology/W17-3001</url>
    <abstract>In this paper, we use a new categorical form of multidimensional register
	analysis to identify the main dimensions of functional linguistic variation in
	a corpus of abusive language, consisting of racist and sexist Tweets. By
	analysing the use of a wide variety of parts-of-speech and grammatical
	constructions, as well as various features related to Twitter and
	computer-mediated communication, we discover three dimensions of linguistic
	variation in this corpus, which we interpret as being related to the degree of
	interactive, antagonistic and attitudinal language exhibited by individual
	Tweets. We then demonstrate that there is a significant functional difference
	between racist and sexist Tweets, with sexists Tweets tending to be more
	interactive and attitudinal than racist Tweets.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>clarke-grieve:2017:ALW1</bibkey>
  </paper>

  <paper id="3002">
    <title>Constructive Language in News Comments</title>
    <author><first>Varada</first><last>Kolhatkar</last></author>
    <author><first>Maite</first><last>Taboada</last></author>
    <booktitle>Proceedings of the First Workshop on Abusive Language Online</booktitle>
    <month>August</month>
    <year>2017</year>
    <address>Vancouver, BC, Canada</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>11&#8211;17</pages>
    <url>http://www.aclweb.org/anthology/W17-3002</url>
    <abstract>We discuss the characteristics of constructive news comments, and present
	methods to identify them. First, we define the notion of constructiveness.
	Second, we annotate a corpus for constructiveness. Third, we explore whether
	available argumentation corpora can be useful to identify constructiveness in
	news comments. Our model trained on argumentation corpora achieves a top
	accuracy of 72.59% (baseline=49.44%) on our crowd-annotated test data. Finally,
	we examine the relation between constructiveness and toxicity. In our
	crowd-annotated data, 21.42% of the non-constructive comments and 17.89% of the
	constructive comments are toxic, suggesting that non-constructive comments are
	not much more toxic than constructive comments.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>kolhatkar-taboada:2017:ALW1</bibkey>
  </paper>

  <paper id="3003">
    <title>Rephrasing Profanity in Chinese Text</title>
    <author><first>Hui-Po</first><last>Su</last></author>
    <author><first>Zhen-Jie</first><last>Huang</last></author>
    <author><first>Hao-Tsung</first><last>Chang</last></author>
    <author><first>Chuan-Jie</first><last>Lin</last></author>
    <booktitle>Proceedings of the First Workshop on Abusive Language Online</booktitle>
    <month>August</month>
    <year>2017</year>
    <address>Vancouver, BC, Canada</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>18&#8211;24</pages>
    <url>http://www.aclweb.org/anthology/W17-3003</url>
    <abstract>This paper proposes a system that can detect and rephrase profanity in Chinese
	text.  Rather than just masking detected profanity, we want to revise the input
	sentence by using inoffensive words while keeping their original meanings.  29
	of such rephrasing rules were invented after observing sentences on real-word
	social websites.  The overall accuracy of the proposed system is 85.56%</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>su-EtAl:2017:ALW1</bibkey>
  </paper>

  <paper id="3004">
    <title>Deep Learning for User Comment Moderation</title>
    <author><first>John</first><last>Pavlopoulos</last></author>
    <author><first>Prodromos</first><last>Malakasiotis</last></author>
    <author><first>Ion</first><last>Androutsopoulos</last></author>
    <booktitle>Proceedings of the First Workshop on Abusive Language Online</booktitle>
    <month>August</month>
    <year>2017</year>
    <address>Vancouver, BC, Canada</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>25&#8211;35</pages>
    <url>http://www.aclweb.org/anthology/W17-3004</url>
    <abstract>Experimenting with a new dataset of 1.6M user comments from a Greek news portal
	and existing datasets of EnglishWikipedia comments, we show that an RNN
	outperforms the previous state of the art in moderation. A deep,
	classification-specific attention mechanism improves further the overall
	performance of the RNN. We also compare against a CNN and a word-list baseline,
	considering both fully automatic and semi-automatic moderation.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>pavlopoulos-malakasiotis-androutsopoulos:2017:ALW1</bibkey>
  </paper>

  <paper id="3005">
    <title>Class-based Prediction Errors to Detect Hate Speech with Out-of-vocabulary Words</title>
    <author><first>Joan</first><last>Serr&#224;</last></author>
    <author><first>Ilias</first><last>Leontiadis</last></author>
    <author><first>Dimitris</first><last>Spathis</last></author>
    <author><first>Gianluca</first><last>Stringhini</last></author>
    <author><first>Jeremy</first><last>Blackburn</last></author>
    <author><first>Athena</first><last>Vakali</last></author>
    <booktitle>Proceedings of the First Workshop on Abusive Language Online</booktitle>
    <month>August</month>
    <year>2017</year>
    <address>Vancouver, BC, Canada</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>36&#8211;40</pages>
    <url>http://www.aclweb.org/anthology/W17-3005</url>
    <abstract>Common approaches  to text categorization essentially rely either on n-gram
	counts or on word embeddings. This presents important difficulties in highly
	dynamic or quickly-interacting environments, where the appearance of new words
	and/or varied misspellings is the norm. A paradigmatic example of this
	situation is abusive online behavior, with social networks and media platforms
	struggling to effectively combat uncommon or non-blacklisted hate words. To
	better deal with these issues in those fast-paced environments, we propose
	using the error signal of class-based language models as input to text
	classification algorithms. In particular, we train a next-character prediction
	model for any given class and then exploit the error of such class-based models
	to inform a neural network classifier. This way, we shift from the ‘ability
	to describe’ seen documents to the ‘ability to predict’ unseen content.
	Preliminary studies using out-of-vocabulary splits from abusive tweet data show
	promising results, outperforming competitive text categorization strategies by
	4-11%.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>serra-EtAl:2017:ALW1</bibkey>
  </paper>

  <paper id="3006">
    <title>One-step and Two-step Classification for Abusive Language Detection on Twitter</title>
    <author><first>Ji Ho</first><last>Park</last></author>
    <author><first>Pascale</first><last>Fung</last></author>
    <booktitle>Proceedings of the First Workshop on Abusive Language Online</booktitle>
    <month>August</month>
    <year>2017</year>
    <address>Vancouver, BC, Canada</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>41&#8211;45</pages>
    <url>http://www.aclweb.org/anthology/W17-3006</url>
    <abstract>Automatic abusive language detection is a difficult but important task for
	online social media. Our research explores a two-step approach of performing
	classification on abusive language and then classifying into specific types and
	compares it with one-step approach of doing one multi-class classification for
	detecting sexist and racist languages. With a public English Twitter corpus of
	20 thousand tweets in the type of sexism and racism, our approach shows a
	promising performance of 0.827 F-measure by using HybridCNN in one-step and
	0.824 F-measure by using logistic regression in two-steps.
	Author{2}Affiliation</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>park-fung:2017:ALW1</bibkey>
  </paper>

  <paper id="3007">
    <title>Legal Framework, Dataset and Annotation Schema for Socially Unacceptable Online Discourse Practices in Slovene</title>
    <author><first>Darja</first><last>Fi&#x161;er</last></author>
    <author><first>Toma&#x17E;</first><last>Erjavec</last></author>
    <author><first>Nikola</first><last>Ljube&#x161;i&#x107;</last></author>
    <booktitle>Proceedings of the First Workshop on Abusive Language Online</booktitle>
    <month>August</month>
    <year>2017</year>
    <address>Vancouver, BC, Canada</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>46&#8211;51</pages>
    <url>http://www.aclweb.org/anthology/W17-3007</url>
    <abstract>In this paper we present the legal framework, dataset and annotation schema of
	socially unacceptable discourse practices on social networking platforms in
	Slovenia. On this basis we aim to train an automatic identification and
	classification system with which we wish contribute towards an improved
	methodology, understanding and treatment of such practices in the contemporary,
	increasingly multicultural information society.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>fivser-erjavec-ljubevsic:2017:ALW1</bibkey>
  </paper>

  <paper id="3008">
    <title>Abusive Language Detection on Arabic Social Media</title>
    <author><first>Hamdy</first><last>Mubarak</last></author>
    <author><first>Kareem</first><last>Darwish</last></author>
    <author><first>Walid</first><last>Magdy</last></author>
    <booktitle>Proceedings of the First Workshop on Abusive Language Online</booktitle>
    <month>August</month>
    <year>2017</year>
    <address>Vancouver, BC, Canada</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>52&#8211;56</pages>
    <url>http://www.aclweb.org/anthology/W17-3008</url>
    <abstract>In this paper, we present our work on detecting
	abusive language on Arabic social
	media. We extract a list of obscene
	words and hashtags using common patterns
	used in offensive and rude communications.
	We also classify Twitter users
	according to whether they use any of these
	words or not in their tweets. We expand
	the list of obscene words using this classification, and we report results on a
	newly created dataset of classified Arabic tweets
	(obscene, offensive, and clean). We make
	this dataset freely available for research, in
	addition to the list of obscene words and
	hashtags. We are also publicly releasing
	a large corpus of classified user comments
	that were deleted from a popular Arabic
	news site due to violations the site’s rules
	and guidelines.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>mubarak-darwish-magdy:2017:ALW1</bibkey>
  </paper>

  <paper id="3009">
    <title>Vectors for Counterspeech on Twitter</title>
    <author><first>Lucas</first><last>Wright</last></author>
    <author><first>Derek</first><last>Ruths</last></author>
    <author><first>Kelly P</first><last>Dillon</last></author>
    <author><first>Haji Mohammad</first><last>Saleem</last></author>
    <author><first>Susan</first><last>Benesch</last></author>
    <booktitle>Proceedings of the First Workshop on Abusive Language Online</booktitle>
    <month>August</month>
    <year>2017</year>
    <address>Vancouver, BC, Canada</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>57&#8211;62</pages>
    <url>http://www.aclweb.org/anthology/W17-3009</url>
    <abstract>A study of conversations on Twitter found that some arguments between strangers
	led to favorable change in discourse and even in attitudes. The authors propose
	that such exchanges can be usefully distinguished according to whether
	individuals or groups take part on each side, since the opportunity for a
	constructive exchange of views seems to vary accordingly.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>wright-EtAl:2017:ALW1</bibkey>
  </paper>

  <paper id="3010">
    <title>Detecting Nastiness in Social Media</title>
    <author><first>Niloofar</first><last>Safi Samghabadi</last></author>
    <author><first>Suraj</first><last>Maharjan</last></author>
    <author><first>Alan</first><last>Sprague</last></author>
    <author><first>Raquel</first><last>Diaz-Sprague</last></author>
    <author><first>Thamar</first><last>Solorio</last></author>
    <booktitle>Proceedings of the First Workshop on Abusive Language Online</booktitle>
    <month>August</month>
    <year>2017</year>
    <address>Vancouver, BC, Canada</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>63&#8211;72</pages>
    <url>http://www.aclweb.org/anthology/W17-3010</url>
    <abstract>Although social media has made it easy for people to connect on a virtually
	unlimited basis, it has also opened doors to people who misuse it to undermine,
	harass, humiliate, threaten and bully others. There is a lack of adequate
	resources to detect and hinder its occurrence. In this paper, we  present our
	initial NLP approach to detect invective posts as a first step to eventually
	detect and deter cyberbullying. We crawl data containing profanities and then
	determine whether or not it contains invective. Annotations on this data are
	improved iteratively by in-lab annotations and crowdsourcing. We pursue
	different NLP approaches containing various typical and some newer techniques
	to distinguish the use of swear words in a neutral way from those instances in
	which they are used in an insulting way. We also show that this model not only
	works for our data set, but also can be successfully applied to different data
	sets.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>safisamghabadi-EtAl:2017:ALW1</bibkey>
  </paper>

  <paper id="3011">
    <title>Technology Solutions to Combat Online Harassment</title>
    <author><first>George</first><last>Kennedy</last></author>
    <author><first>Andrew</first><last>McCollough</last></author>
    <author><first>Edward</first><last>Dixon</last></author>
    <author><first>Alexei</first><last>Bastidas</last></author>
    <author><first>John</first><last>Ryan</last></author>
    <author><first>Chris</first><last>Loo</last></author>
    <author><first>Saurav</first><last>Sahay</last></author>
    <booktitle>Proceedings of the First Workshop on Abusive Language Online</booktitle>
    <month>August</month>
    <year>2017</year>
    <address>Vancouver, BC, Canada</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>73&#8211;77</pages>
    <url>http://www.aclweb.org/anthology/W17-3011</url>
    <abstract>This work is part of a new initiative to use machine learning to identify
	online harassment in social media and comment streams. Online harassment goes
	under-reported due to the reliance on humans to identify and report harassment,
	reporting that is further slowed by requirements to fill out forms providing
	context. In addition, the time for moderators to respond and apply human
	judgment can take days, but response times in terms of minutes are needed in
	the online context. Though some of the major social media companies have been
	doing proprietary work in automating the detection of harassment, there are few
	tools available for use by the public. In addition, the amount of labeled
	online harassment data and availability of cross-platform online harassment
	datasets is limited. We present the methodology used to create a harassment
	dataset and classifier and the dataset used to help the system learn what
	harassment looks like.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>kennedy-EtAl:2017:ALW1</bibkey>
  </paper>

  <paper id="3012">
    <title>Understanding Abuse: A Typology of Abusive Language Detection Subtasks</title>
    <author><first>Zeerak</first><last>Waseem</last></author>
    <author><first>Thomas</first><last>Davidson</last></author>
    <author><first>Dana</first><last>Warmsley</last></author>
    <author><first>Ingmar</first><last>Weber</last></author>
    <booktitle>Proceedings of the First Workshop on Abusive Language Online</booktitle>
    <month>August</month>
    <year>2017</year>
    <address>Vancouver, BC, Canada</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>78&#8211;84</pages>
    <url>http://www.aclweb.org/anthology/W17-3012</url>
    <abstract>As the body of research on abusive language detection and analysis grows, there
	is a need for critical consideration of the relationships between different
	subtasks that have been grouped under this label. Based on work on hate speech,
	cyberbullying, and online abuse we propose a typology that captures central
	similarities and differences between subtasks and discuss the implications of
	this for data annotation and feature construction. We emphasize the practical
	actions that can be taken by researchers to best approach their abusive
	language detection subtask of interest.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>waseem-EtAl:2017:ALW1</bibkey>
  </paper>

  <paper id="3013">
    <title>Using Convolutional Neural Networks to Classify Hate-Speech</title>
    <author><first>Bj&#246;rn</first><last>Gamb&#228;ck</last></author>
    <author><first>Utpal Kumar</first><last>Sikdar</last></author>
    <booktitle>Proceedings of the First Workshop on Abusive Language Online</booktitle>
    <month>August</month>
    <year>2017</year>
    <address>Vancouver, BC, Canada</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>85&#8211;90</pages>
    <url>http://www.aclweb.org/anthology/W17-3013</url>
    <abstract>The paper introduces a deep learning-based Twitter hate-speech text
	classification system. The classifier assigns each tweet to one of four
	predefined categories: racism, sexism, both (racism and sexism) and
	non-hate-speech. Four Convolutional Neural Network models were trained on resp.
	character 4-grams, word vectors based on semantic information built using
	word2vec, randomly generated word vectors, and word vectors combined with
	character n-grams. The feature set was down-sized in the networks by
	max-pooling, and a softmax function used to classify tweets. Tested by 10-fold
	cross-validation, the model based on word2vec embeddings performed best, with
	higher precision than recall, and a 78.3% F-score.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>gamback-sikdar:2017:ALW1</bibkey>
  </paper>

  <paper id="3014">
    <title>Illegal is not a Noun: Linguistic Form for Detection of Pejorative Nominalizations</title>
    <author><first>Alexis</first><last>Palmer</last></author>
    <author><first>Melissa</first><last>Robinson</last></author>
    <author><first>Kristy K.</first><last>Phillips</last></author>
    <booktitle>Proceedings of the First Workshop on Abusive Language Online</booktitle>
    <month>August</month>
    <year>2017</year>
    <address>Vancouver, BC, Canada</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>91&#8211;100</pages>
    <url>http://www.aclweb.org/anthology/W17-3014</url>
    <abstract>This paper focuses on a particular type of abusive language, targeting 
	expressions in which typically neutral adjectives take on pejorative meaning
	when used as nouns - compare 'gay people' to 'the gays'. We first collect and
	analyze a corpus of hand-curated, expert-annotated pejorative nominalizations
	for four target adjectives: female, gay, illegal, and poor. We then collect a
	second corpus of automatically-extracted and POS-tagged, crowd-annotated
	tweets. For both corpora, we find support for the hypothesis that some
	adjectives, when nominalized, take on negative meaning. The targeted
	constructions are non-standard yet widely-used, and part-of-speech taggers
	mistag some nominal forms as adjectives. We implement a tool called NomCatcher
	to correct these mistaggings, and find that the same tool is effective for
	identifying new adjectives subject to transformation via nominalization into
	abusive language.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>palmer-robinson-phillips:2017:ALW1</bibkey>
  </paper>

</volume>

