@inproceedings{madukwe-etal-2020-data,
title = "In Data We Trust: A Critical Analysis of Hate Speech Detection Datasets",
author = "Madukwe, Kosisochukwu and
Gao, Xiaoying and
Xue, Bing",
editor = "Akiwowo, Seyi and
Vidgen, Bertie and
Prabhakaran, Vinodkumar and
Waseem, Zeerak",
booktitle = "Proceedings of the Fourth Workshop on Online Abuse and Harms",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.alw-1.18",
doi = "10.18653/v1/2020.alw-1.18",
pages = "150--161",
abstract = "Recently, a few studies have discussed the limitations of datasets collected for the task of detecting hate speech from different viewpoints. We intend to contribute to the conversation by providing a consolidated overview of these issues pertaining to the data that debilitate research in this area. Specifically, we discuss how the varying pre-processing steps and the format for making data publicly available result in highly varying datasets that make an objective comparison between studies difficult and unfair. There is currently no study (to the best of our knowledge) focused on comparing the attributes of existing datasets for hate speech detection, outlining their limitations and recommending approaches for future research. This work intends to fill that gap and become the one-stop shop for information regarding hate speech datasets.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="madukwe-etal-2020-data">
<titleInfo>
<title>In Data We Trust: A Critical Analysis of Hate Speech Detection Datasets</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kosisochukwu</namePart>
<namePart type="family">Madukwe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaoying</namePart>
<namePart type="family">Gao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bing</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on Online Abuse and Harms</title>
</titleInfo>
<name type="personal">
<namePart type="given">Seyi</namePart>
<namePart type="family">Akiwowo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bertie</namePart>
<namePart type="family">Vidgen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vinodkumar</namePart>
<namePart type="family">Prabhakaran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zeerak</namePart>
<namePart type="family">Waseem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recently, a few studies have discussed the limitations of datasets collected for the task of detecting hate speech from different viewpoints. We intend to contribute to the conversation by providing a consolidated overview of these issues pertaining to the data that debilitate research in this area. Specifically, we discuss how the varying pre-processing steps and the format for making data publicly available result in highly varying datasets that make an objective comparison between studies difficult and unfair. There is currently no study (to the best of our knowledge) focused on comparing the attributes of existing datasets for hate speech detection, outlining their limitations and recommending approaches for future research. This work intends to fill that gap and become the one-stop shop for information regarding hate speech datasets.</abstract>
<identifier type="citekey">madukwe-etal-2020-data</identifier>
<identifier type="doi">10.18653/v1/2020.alw-1.18</identifier>
<location>
<url>https://aclanthology.org/2020.alw-1.18</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>150</start>
<end>161</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T In Data We Trust: A Critical Analysis of Hate Speech Detection Datasets
%A Madukwe, Kosisochukwu
%A Gao, Xiaoying
%A Xue, Bing
%Y Akiwowo, Seyi
%Y Vidgen, Bertie
%Y Prabhakaran, Vinodkumar
%Y Waseem, Zeerak
%S Proceedings of the Fourth Workshop on Online Abuse and Harms
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F madukwe-etal-2020-data
%X Recently, a few studies have discussed the limitations of datasets collected for the task of detecting hate speech from different viewpoints. We intend to contribute to the conversation by providing a consolidated overview of these issues pertaining to the data that debilitate research in this area. Specifically, we discuss how the varying pre-processing steps and the format for making data publicly available result in highly varying datasets that make an objective comparison between studies difficult and unfair. There is currently no study (to the best of our knowledge) focused on comparing the attributes of existing datasets for hate speech detection, outlining their limitations and recommending approaches for future research. This work intends to fill that gap and become the one-stop shop for information regarding hate speech datasets.
%R 10.18653/v1/2020.alw-1.18
%U https://aclanthology.org/2020.alw-1.18
%U https://doi.org/10.18653/v1/2020.alw-1.18
%P 150-161
Markdown (Informal)
[In Data We Trust: A Critical Analysis of Hate Speech Detection Datasets](https://aclanthology.org/2020.alw-1.18) (Madukwe et al., ALW 2020)
ACL