@inproceedings{elsafoury-2022-darkness,
title = "Darkness can not drive out darkness: Investigating Bias in Hate {S}peech{D}etection Models",
author = "Elsafoury, Fatma",
editor = "Louvan, Samuel and
Madotto, Andrea and
Madureira, Brielen",
booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.acl-srw.4/",
pages = "31--43",
abstract = "It has become crucial to develop tools for automated hate speech and abuse detection. These tools would help to stop the bullies and the haters and provide a safer environment for individuals especially from marginalized groups to freely express themselves. However, recent research shows that machine learning models are biased and they might make the right decisions for the wrong reasons. In this thesis, I set out to understand the performance of hate speech and abuse detection models and the different biases that could influence them. I show that hate speech and abuse detection models are not only subject to social bias but also to other types of bias that have not been explored before. Finally, I investigate the causal effect of the social and intersectional bias on the performance and unfairness of hate speech detection models."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="elsafoury-2022-darkness">
<titleInfo>
<title>Darkness can not drive out darkness: Investigating Bias in Hate SpeechDetection Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Fatma</namePart>
<namePart type="family">Elsafoury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Samuel</namePart>
<namePart type="family">Louvan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrea</namePart>
<namePart type="family">Madotto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brielen</namePart>
<namePart type="family">Madureira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>It has become crucial to develop tools for automated hate speech and abuse detection. These tools would help to stop the bullies and the haters and provide a safer environment for individuals especially from marginalized groups to freely express themselves. However, recent research shows that machine learning models are biased and they might make the right decisions for the wrong reasons. In this thesis, I set out to understand the performance of hate speech and abuse detection models and the different biases that could influence them. I show that hate speech and abuse detection models are not only subject to social bias but also to other types of bias that have not been explored before. Finally, I investigate the causal effect of the social and intersectional bias on the performance and unfairness of hate speech detection models.</abstract>
<identifier type="citekey">elsafoury-2022-darkness</identifier>
<location>
<url>https://aclanthology.org/2022.acl-srw.4/</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>31</start>
<end>43</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Darkness can not drive out darkness: Investigating Bias in Hate SpeechDetection Models
%A Elsafoury, Fatma
%Y Louvan, Samuel
%Y Madotto, Andrea
%Y Madureira, Brielen
%S Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F elsafoury-2022-darkness
%X It has become crucial to develop tools for automated hate speech and abuse detection. These tools would help to stop the bullies and the haters and provide a safer environment for individuals especially from marginalized groups to freely express themselves. However, recent research shows that machine learning models are biased and they might make the right decisions for the wrong reasons. In this thesis, I set out to understand the performance of hate speech and abuse detection models and the different biases that could influence them. I show that hate speech and abuse detection models are not only subject to social bias but also to other types of bias that have not been explored before. Finally, I investigate the causal effect of the social and intersectional bias on the performance and unfairness of hate speech detection models.
%U https://aclanthology.org/2022.acl-srw.4/
%P 31-43
Markdown (Informal)
[Darkness can not drive out darkness: Investigating Bias in Hate SpeechDetection Models](https://aclanthology.org/2022.acl-srw.4/) (Elsafoury, ACL 2022)
ACL