@inproceedings{sap-etal-2019-risk,
    title = "The Risk of Racial Bias in Hate Speech Detection",
    author = "Sap, Maarten  and
      Card, Dallas  and
      Gabriel, Saadia  and
      Choi, Yejin  and
      Smith, Noah A.",
    editor = "Korhonen, Anna  and
      Traum, David  and
      M{\`a}rquez, Llu{\'i}s",
    booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics",
    month = jul,
    year = "2019",
    address = "Florence, Italy",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/P19-1163/",
    doi = "10.18653/v1/P19-1163",
    pages = "1668--1678",
    abstract = "We investigate how annotators' insensitivity to differences in dialect can lead to racial bias in automatic hate speech detection models, potentially amplifying harm against minority populations. We first uncover unexpected correlations between surface markers of African American English (AAE) and ratings of toxicity in several widely-used hate speech datasets. Then, we show that models trained on these corpora acquire and propagate these biases, such that AAE tweets and tweets by self-identified African Americans are up to two times more likely to be labelled as offensive compared to others. Finally, we propose *dialect* and *race priming* as ways to reduce the racial bias in annotation, showing that when annotators are made explicitly aware of an AAE tweet{'}s dialect they are significantly less likely to label the tweet as offensive."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sap-etal-2019-risk">
    <titleInfo>
        <title>The Risk of Racial Bias in Hate Speech Detection</title>
    </titleInfo>
    <name type="personal">
        <namePart type="given">Maarten</namePart>
        <namePart type="family">Sap</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Dallas</namePart>
        <namePart type="family">Card</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Saadia</namePart>
        <namePart type="family">Gabriel</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Yejin</namePart>
        <namePart type="family">Choi</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Noah</namePart>
        <namePart type="given">A</namePart>
        <namePart type="family">Smith</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <originInfo>
        <dateIssued>2019-07</dateIssued>
    </originInfo>
    <typeOfResource>text</typeOfResource>
    <relatedItem type="host">
        <titleInfo>
            <title>Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics</title>
        </titleInfo>
        <name type="personal">
            <namePart type="given">Anna</namePart>
            <namePart type="family">Korhonen</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">David</namePart>
            <namePart type="family">Traum</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Lluís</namePart>
            <namePart type="family">Màrquez</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <originInfo>
            <publisher>Association for Computational Linguistics</publisher>
            <place>
                <placeTerm type="text">Florence, Italy</placeTerm>
            </place>
        </originInfo>
        <genre authority="marcgt">conference publication</genre>
    </relatedItem>
    <abstract>We investigate how annotators’ insensitivity to differences in dialect can lead to racial bias in automatic hate speech detection models, potentially amplifying harm against minority populations. We first uncover unexpected correlations between surface markers of African American English (AAE) and ratings of toxicity in several widely-used hate speech datasets. Then, we show that models trained on these corpora acquire and propagate these biases, such that AAE tweets and tweets by self-identified African Americans are up to two times more likely to be labelled as offensive compared to others. Finally, we propose *dialect* and *race priming* as ways to reduce the racial bias in annotation, showing that when annotators are made explicitly aware of an AAE tweet’s dialect they are significantly less likely to label the tweet as offensive.</abstract>
    <identifier type="citekey">sap-etal-2019-risk</identifier>
    <identifier type="doi">10.18653/v1/P19-1163</identifier>
    <location>
        <url>https://aclanthology.org/P19-1163/</url>
    </location>
    <part>
        <date>2019-07</date>
        <extent unit="page">
            <start>1668</start>
            <end>1678</end>
        </extent>
    </part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Risk of Racial Bias in Hate Speech Detection
%A Sap, Maarten
%A Card, Dallas
%A Gabriel, Saadia
%A Choi, Yejin
%A Smith, Noah A.
%Y Korhonen, Anna
%Y Traum, David
%Y Màrquez, Lluís
%S Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics
%D 2019
%8 July
%I Association for Computational Linguistics
%C Florence, Italy
%F sap-etal-2019-risk
%X We investigate how annotators’ insensitivity to differences in dialect can lead to racial bias in automatic hate speech detection models, potentially amplifying harm against minority populations. We first uncover unexpected correlations between surface markers of African American English (AAE) and ratings of toxicity in several widely-used hate speech datasets. Then, we show that models trained on these corpora acquire and propagate these biases, such that AAE tweets and tweets by self-identified African Americans are up to two times more likely to be labelled as offensive compared to others. Finally, we propose *dialect* and *race priming* as ways to reduce the racial bias in annotation, showing that when annotators are made explicitly aware of an AAE tweet’s dialect they are significantly less likely to label the tweet as offensive.
%R 10.18653/v1/P19-1163
%U https://aclanthology.org/P19-1163/
%U https://doi.org/10.18653/v1/P19-1163
%P 1668-1678
Markdown (Informal)
[The Risk of Racial Bias in Hate Speech Detection](https://aclanthology.org/P19-1163/) (Sap et al., ACL 2019)
ACL
- Maarten Sap, Dallas Card, Saadia Gabriel, Yejin Choi, and Noah A. Smith. 2019. The Risk of Racial Bias in Hate Speech Detection. In Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, pages 1668–1678, Florence, Italy. Association for Computational Linguistics.