@inproceedings{srivastava-etal-2018-identifying,
title = "Identifying Aggression and Toxicity in Comments using Capsule Network",
author = "Srivastava, Saurabh and
Khurana, Prerna and
Tewari, Vartika",
editor = "Kumar, Ritesh and
Ojha, Atul Kr. and
Zampieri, Marcos and
Malmasi, Shervin",
booktitle = "Proceedings of the First Workshop on Trolling, Aggression and Cyberbullying ({TRAC}-2018)",
month = aug,
year = "2018",
address = "Santa Fe, New Mexico, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-4412/",
pages = "98--105",
abstract = "Aggression and related activities like trolling, hate speech etc. involve toxic comments in various forms. These are common scenarios in today's time and websites react by shutting down their comment sections. To tackle this, an algorithmic solution is preferred to human moderation which is slow and expensive. In this paper, we propose a single model capsule network with focal loss to achieve this task which is suitable for production environment. Our model achieves competitive results over other strong baseline methods, which show its effectiveness and that focal loss exhibits significant improvement in such cases where class imbalance is a regular issue. Additionally, we show that the problem of extensive data preprocessing, data augmentation can be tackled by capsule networks implicitly. We achieve an overall ROC AUC of 98.46 on Kaggle-toxic comment dataset and show that it beats other architectures by a good margin. As comments tend to be written in more than one language, and transliteration is a common problem, we further show that our model handles this effectively by applying our model on TRAC shared task dataset which contains comments in code-mixed Hindi-English."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="srivastava-etal-2018-identifying">
<titleInfo>
<title>Identifying Aggression and Toxicity in Comments using Capsule Network</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saurabh</namePart>
<namePart type="family">Srivastava</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Prerna</namePart>
<namePart type="family">Khurana</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vartika</namePart>
<namePart type="family">Tewari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Trolling, Aggression and Cyberbullying (TRAC-2018)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ritesh</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shervin</namePart>
<namePart type="family">Malmasi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Santa Fe, New Mexico, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Aggression and related activities like trolling, hate speech etc. involve toxic comments in various forms. These are common scenarios in today’s time and websites react by shutting down their comment sections. To tackle this, an algorithmic solution is preferred to human moderation which is slow and expensive. In this paper, we propose a single model capsule network with focal loss to achieve this task which is suitable for production environment. Our model achieves competitive results over other strong baseline methods, which show its effectiveness and that focal loss exhibits significant improvement in such cases where class imbalance is a regular issue. Additionally, we show that the problem of extensive data preprocessing, data augmentation can be tackled by capsule networks implicitly. We achieve an overall ROC AUC of 98.46 on Kaggle-toxic comment dataset and show that it beats other architectures by a good margin. As comments tend to be written in more than one language, and transliteration is a common problem, we further show that our model handles this effectively by applying our model on TRAC shared task dataset which contains comments in code-mixed Hindi-English.</abstract>
<identifier type="citekey">srivastava-etal-2018-identifying</identifier>
<location>
<url>https://aclanthology.org/W18-4412/</url>
</location>
<part>
<date>2018-08</date>
<extent unit="page">
<start>98</start>
<end>105</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Identifying Aggression and Toxicity in Comments using Capsule Network
%A Srivastava, Saurabh
%A Khurana, Prerna
%A Tewari, Vartika
%Y Kumar, Ritesh
%Y Ojha, Atul Kr.
%Y Zampieri, Marcos
%Y Malmasi, Shervin
%S Proceedings of the First Workshop on Trolling, Aggression and Cyberbullying (TRAC-2018)
%D 2018
%8 August
%I Association for Computational Linguistics
%C Santa Fe, New Mexico, USA
%F srivastava-etal-2018-identifying
%X Aggression and related activities like trolling, hate speech etc. involve toxic comments in various forms. These are common scenarios in today’s time and websites react by shutting down their comment sections. To tackle this, an algorithmic solution is preferred to human moderation which is slow and expensive. In this paper, we propose a single model capsule network with focal loss to achieve this task which is suitable for production environment. Our model achieves competitive results over other strong baseline methods, which show its effectiveness and that focal loss exhibits significant improvement in such cases where class imbalance is a regular issue. Additionally, we show that the problem of extensive data preprocessing, data augmentation can be tackled by capsule networks implicitly. We achieve an overall ROC AUC of 98.46 on Kaggle-toxic comment dataset and show that it beats other architectures by a good margin. As comments tend to be written in more than one language, and transliteration is a common problem, we further show that our model handles this effectively by applying our model on TRAC shared task dataset which contains comments in code-mixed Hindi-English.
%U https://aclanthology.org/W18-4412/
%P 98-105
Markdown (Informal)
[Identifying Aggression and Toxicity in Comments using Capsule Network](https://aclanthology.org/W18-4412/) (Srivastava et al., TRAC 2018)
ACL