@inproceedings{risch-krestel-2018-aggression,
title = "Aggression Identification Using Deep Learning and Data Augmentation",
author = "Risch, Julian and
Krestel, Ralf",
editor = "Kumar, Ritesh and
Ojha, Atul Kr. and
Zampieri, Marcos and
Malmasi, Shervin",
booktitle = "Proceedings of the First Workshop on Trolling, Aggression and Cyberbullying ({TRAC}-2018)",
month = aug,
year = "2018",
address = "Santa Fe, New Mexico, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-4418",
pages = "150--158",
abstract = "Social media platforms allow users to share and discuss their opinions online. However, a minority of user posts is aggressive, thereby hinders respectful discussion, and {---} at an extreme level {---} is liable to prosecution. The automatic identification of such harmful posts is important, because it can support the costly manual moderation of online discussions. Further, the automation allows unprecedented analyses of discussion datasets that contain millions of posts. This system description paper presents our submission to the First Shared Task on Aggression Identification. We propose to augment the provided dataset to increase the number of labeled comments from 15,000 to 60,000. Thereby, we introduce linguistic variety into the dataset. As a consequence of the larger amount of training data, we are able to train a special deep neural net, which generalizes especially well to unseen data. To further boost the performance, we combine this neural net with three logistic regression classifiers trained on character and word n-grams, and hand-picked syntactic features. This ensemble is more robust than the individual single models. Our team named {``}Julian{''} achieves an F1-score of 60{\%} on both English datasets, 63{\%} on the Hindi Facebook dataset, and 38{\%} on the Hindi Twitter dataset.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="risch-krestel-2018-aggression">
<titleInfo>
<title>Aggression Identification Using Deep Learning and Data Augmentation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Julian</namePart>
<namePart type="family">Risch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ralf</namePart>
<namePart type="family">Krestel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Trolling, Aggression and Cyberbullying (TRAC-2018)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ritesh</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shervin</namePart>
<namePart type="family">Malmasi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Santa Fe, New Mexico, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Social media platforms allow users to share and discuss their opinions online. However, a minority of user posts is aggressive, thereby hinders respectful discussion, and — at an extreme level — is liable to prosecution. The automatic identification of such harmful posts is important, because it can support the costly manual moderation of online discussions. Further, the automation allows unprecedented analyses of discussion datasets that contain millions of posts. This system description paper presents our submission to the First Shared Task on Aggression Identification. We propose to augment the provided dataset to increase the number of labeled comments from 15,000 to 60,000. Thereby, we introduce linguistic variety into the dataset. As a consequence of the larger amount of training data, we are able to train a special deep neural net, which generalizes especially well to unseen data. To further boost the performance, we combine this neural net with three logistic regression classifiers trained on character and word n-grams, and hand-picked syntactic features. This ensemble is more robust than the individual single models. Our team named “Julian” achieves an F1-score of 60% on both English datasets, 63% on the Hindi Facebook dataset, and 38% on the Hindi Twitter dataset.</abstract>
<identifier type="citekey">risch-krestel-2018-aggression</identifier>
<location>
<url>https://aclanthology.org/W18-4418</url>
</location>
<part>
<date>2018-08</date>
<extent unit="page">
<start>150</start>
<end>158</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Aggression Identification Using Deep Learning and Data Augmentation
%A Risch, Julian
%A Krestel, Ralf
%Y Kumar, Ritesh
%Y Ojha, Atul Kr.
%Y Zampieri, Marcos
%Y Malmasi, Shervin
%S Proceedings of the First Workshop on Trolling, Aggression and Cyberbullying (TRAC-2018)
%D 2018
%8 August
%I Association for Computational Linguistics
%C Santa Fe, New Mexico, USA
%F risch-krestel-2018-aggression
%X Social media platforms allow users to share and discuss their opinions online. However, a minority of user posts is aggressive, thereby hinders respectful discussion, and — at an extreme level — is liable to prosecution. The automatic identification of such harmful posts is important, because it can support the costly manual moderation of online discussions. Further, the automation allows unprecedented analyses of discussion datasets that contain millions of posts. This system description paper presents our submission to the First Shared Task on Aggression Identification. We propose to augment the provided dataset to increase the number of labeled comments from 15,000 to 60,000. Thereby, we introduce linguistic variety into the dataset. As a consequence of the larger amount of training data, we are able to train a special deep neural net, which generalizes especially well to unseen data. To further boost the performance, we combine this neural net with three logistic regression classifiers trained on character and word n-grams, and hand-picked syntactic features. This ensemble is more robust than the individual single models. Our team named “Julian” achieves an F1-score of 60% on both English datasets, 63% on the Hindi Facebook dataset, and 38% on the Hindi Twitter dataset.
%U https://aclanthology.org/W18-4418
%P 150-158
Markdown (Informal)
[Aggression Identification Using Deep Learning and Data Augmentation](https://aclanthology.org/W18-4418) (Risch & Krestel, TRAC 2018)
ACL