@inproceedings{cao-lee-2020-hategan,
title = "{H}ate{GAN}: Adversarial Generative-Based Data Augmentation for Hate Speech Detection",
author = "Cao, Rui and
Lee, Roy Ka-Wei",
editor = "Scott, Donia and
Bel, Nuria and
Zong, Chengqing",
booktitle = "Proceedings of the 28th International Conference on Computational Linguistics",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2020.coling-main.557",
doi = "10.18653/v1/2020.coling-main.557",
pages = "6327--6338",
abstract = "Academia and industry have developed machine learning and natural language processing models to detect online hate speech automatically. However, most of these existing methods adopt a supervised approach that heavily depends on labeled datasets for training. This results in the methods{'} poor detection performance of the hate speech class as the training datasets are highly imbalanced. In this paper, we propose HateGAN, a deep generative reinforcement learning model, which addresses the challenge of imbalance class by augmenting the dataset with hateful tweets. We conduct extensive experiments to augment two commonly-used hate speech detection datasets with the HateGAN generated tweets. Our experiment results show that HateGAN improves the detection performance of the hate speech class regardless of the classifiers and datasets used in the detection task. Specifically, we observe an average 5{\%} improvement for the hate class F1 scores across all state-of-the-art hate speech classifiers. We also conduct case studies to empirically examine the HateGAN generated hate speeches and show that the generated tweets are diverse, coherent, and relevant to hate speech detection.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cao-lee-2020-hategan">
<titleInfo>
<title>HateGAN: Adversarial Generative-Based Data Augmentation for Hate Speech Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rui</namePart>
<namePart type="family">Cao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roy</namePart>
<namePart type="given">Ka-Wei</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 28th International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Donia</namePart>
<namePart type="family">Scott</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nuria</namePart>
<namePart type="family">Bel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chengqing</namePart>
<namePart type="family">Zong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona, Spain (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Academia and industry have developed machine learning and natural language processing models to detect online hate speech automatically. However, most of these existing methods adopt a supervised approach that heavily depends on labeled datasets for training. This results in the methods’ poor detection performance of the hate speech class as the training datasets are highly imbalanced. In this paper, we propose HateGAN, a deep generative reinforcement learning model, which addresses the challenge of imbalance class by augmenting the dataset with hateful tweets. We conduct extensive experiments to augment two commonly-used hate speech detection datasets with the HateGAN generated tweets. Our experiment results show that HateGAN improves the detection performance of the hate speech class regardless of the classifiers and datasets used in the detection task. Specifically, we observe an average 5% improvement for the hate class F1 scores across all state-of-the-art hate speech classifiers. We also conduct case studies to empirically examine the HateGAN generated hate speeches and show that the generated tweets are diverse, coherent, and relevant to hate speech detection.</abstract>
<identifier type="citekey">cao-lee-2020-hategan</identifier>
<identifier type="doi">10.18653/v1/2020.coling-main.557</identifier>
<location>
<url>https://aclanthology.org/2020.coling-main.557</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>6327</start>
<end>6338</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T HateGAN: Adversarial Generative-Based Data Augmentation for Hate Speech Detection
%A Cao, Rui
%A Lee, Roy Ka-Wei
%Y Scott, Donia
%Y Bel, Nuria
%Y Zong, Chengqing
%S Proceedings of the 28th International Conference on Computational Linguistics
%D 2020
%8 December
%I International Committee on Computational Linguistics
%C Barcelona, Spain (Online)
%F cao-lee-2020-hategan
%X Academia and industry have developed machine learning and natural language processing models to detect online hate speech automatically. However, most of these existing methods adopt a supervised approach that heavily depends on labeled datasets for training. This results in the methods’ poor detection performance of the hate speech class as the training datasets are highly imbalanced. In this paper, we propose HateGAN, a deep generative reinforcement learning model, which addresses the challenge of imbalance class by augmenting the dataset with hateful tweets. We conduct extensive experiments to augment two commonly-used hate speech detection datasets with the HateGAN generated tweets. Our experiment results show that HateGAN improves the detection performance of the hate speech class regardless of the classifiers and datasets used in the detection task. Specifically, we observe an average 5% improvement for the hate class F1 scores across all state-of-the-art hate speech classifiers. We also conduct case studies to empirically examine the HateGAN generated hate speeches and show that the generated tweets are diverse, coherent, and relevant to hate speech detection.
%R 10.18653/v1/2020.coling-main.557
%U https://aclanthology.org/2020.coling-main.557
%U https://doi.org/10.18653/v1/2020.coling-main.557
%P 6327-6338
Markdown (Informal)
[HateGAN: Adversarial Generative-Based Data Augmentation for Hate Speech Detection](https://aclanthology.org/2020.coling-main.557) (Cao & Lee, COLING 2020)
ACL