@inproceedings{xia-etal-2020-demoting,
title = "Demoting Racial Bias in Hate Speech Detection",
author = "Xia, Mengzhou and
Field, Anjalie and
Tsvetkov, Yulia",
editor = "Ku, Lun-Wei and
Li, Cheng-Te",
booktitle = "Proceedings of the Eighth International Workshop on Natural Language Processing for Social Media",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.socialnlp-1.2",
doi = "10.18653/v1/2020.socialnlp-1.2",
pages = "7--14",
abstract = "In the task of hate speech detection, there exists a high correlation between African American English (AAE) and annotators{'} perceptions of toxicity in current datasets. This bias in annotated training data and the tendency of machine learning models to amplify it cause AAE text to often be mislabeled as abusive/offensive/hate speech (high false positive rate) by current hate speech classifiers. Here, we use adversarial training to mitigate this bias. Experimental results on one hate speech dataset and one AAE dataset suggest that our method is able to reduce the false positive rate for AAE text with only a minimal compromise on the performance of hate speech classification.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="xia-etal-2020-demoting">
<titleInfo>
<title>Demoting Racial Bias in Hate Speech Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mengzhou</namePart>
<namePart type="family">Xia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anjalie</namePart>
<namePart type="family">Field</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulia</namePart>
<namePart type="family">Tsvetkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth International Workshop on Natural Language Processing for Social Media</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cheng-Te</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In the task of hate speech detection, there exists a high correlation between African American English (AAE) and annotators’ perceptions of toxicity in current datasets. This bias in annotated training data and the tendency of machine learning models to amplify it cause AAE text to often be mislabeled as abusive/offensive/hate speech (high false positive rate) by current hate speech classifiers. Here, we use adversarial training to mitigate this bias. Experimental results on one hate speech dataset and one AAE dataset suggest that our method is able to reduce the false positive rate for AAE text with only a minimal compromise on the performance of hate speech classification.</abstract>
<identifier type="citekey">xia-etal-2020-demoting</identifier>
<identifier type="doi">10.18653/v1/2020.socialnlp-1.2</identifier>
<location>
<url>https://aclanthology.org/2020.socialnlp-1.2</url>
</location>
<part>
<date>2020-07</date>
<extent unit="page">
<start>7</start>
<end>14</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Demoting Racial Bias in Hate Speech Detection
%A Xia, Mengzhou
%A Field, Anjalie
%A Tsvetkov, Yulia
%Y Ku, Lun-Wei
%Y Li, Cheng-Te
%S Proceedings of the Eighth International Workshop on Natural Language Processing for Social Media
%D 2020
%8 July
%I Association for Computational Linguistics
%C Online
%F xia-etal-2020-demoting
%X In the task of hate speech detection, there exists a high correlation between African American English (AAE) and annotators’ perceptions of toxicity in current datasets. This bias in annotated training data and the tendency of machine learning models to amplify it cause AAE text to often be mislabeled as abusive/offensive/hate speech (high false positive rate) by current hate speech classifiers. Here, we use adversarial training to mitigate this bias. Experimental results on one hate speech dataset and one AAE dataset suggest that our method is able to reduce the false positive rate for AAE text with only a minimal compromise on the performance of hate speech classification.
%R 10.18653/v1/2020.socialnlp-1.2
%U https://aclanthology.org/2020.socialnlp-1.2
%U https://doi.org/10.18653/v1/2020.socialnlp-1.2
%P 7-14
Markdown (Informal)
[Demoting Racial Bias in Hate Speech Detection](https://aclanthology.org/2020.socialnlp-1.2) (Xia et al., SocialNLP 2020)
ACL
- Mengzhou Xia, Anjalie Field, and Yulia Tsvetkov. 2020. Demoting Racial Bias in Hate Speech Detection. In Proceedings of the Eighth International Workshop on Natural Language Processing for Social Media, pages 7–14, Online. Association for Computational Linguistics.