@inproceedings{zueva-etal-2020-reducing,
title = "Reducing Unintended Identity Bias in {R}ussian Hate Speech Detection",
author = "Zueva, Nadezhda and
Kabirova, Madina and
Kalaidin, Pavel",
booktitle = "Proceedings of the Fourth Workshop on Online Abuse and Harms",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.alw-1.8",
doi = "10.18653/v1/2020.alw-1.8",
pages = "65--69",
abstract = "Toxicity has become a grave problem for many online communities, and has been growing across many languages, including Russian. Hate speech creates an environment of intimidation, discrimination, and may even incite some real-world violence. Both researchers and social platforms have been focused on developing models to detect toxicity in online communication for a while now. A common problem of these models is the presence of bias towards some words (e.g. woman, black, jew or женщина, черный, еврей) that are not toxic, but serve as triggers for the classifier due to model caveats. In this paper, we describe our efforts towards classifying hate speech in Russian, and propose simple techniques of reducing unintended bias, such as generating training data with language models using terms and words related to protected identities as context and applying word dropout to such words.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zueva-etal-2020-reducing">
<titleInfo>
<title>Reducing Unintended Identity Bias in Russian Hate Speech Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nadezhda</namePart>
<namePart type="family">Zueva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Madina</namePart>
<namePart type="family">Kabirova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pavel</namePart>
<namePart type="family">Kalaidin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on Online Abuse and Harms</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Toxicity has become a grave problem for many online communities, and has been growing across many languages, including Russian. Hate speech creates an environment of intimidation, discrimination, and may even incite some real-world violence. Both researchers and social platforms have been focused on developing models to detect toxicity in online communication for a while now. A common problem of these models is the presence of bias towards some words (e.g. woman, black, jew or женщина, черный, еврей) that are not toxic, but serve as triggers for the classifier due to model caveats. In this paper, we describe our efforts towards classifying hate speech in Russian, and propose simple techniques of reducing unintended bias, such as generating training data with language models using terms and words related to protected identities as context and applying word dropout to such words.</abstract>
<identifier type="citekey">zueva-etal-2020-reducing</identifier>
<identifier type="doi">10.18653/v1/2020.alw-1.8</identifier>
<location>
<url>https://aclanthology.org/2020.alw-1.8</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>65</start>
<end>69</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Reducing Unintended Identity Bias in Russian Hate Speech Detection
%A Zueva, Nadezhda
%A Kabirova, Madina
%A Kalaidin, Pavel
%S Proceedings of the Fourth Workshop on Online Abuse and Harms
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F zueva-etal-2020-reducing
%X Toxicity has become a grave problem for many online communities, and has been growing across many languages, including Russian. Hate speech creates an environment of intimidation, discrimination, and may even incite some real-world violence. Both researchers and social platforms have been focused on developing models to detect toxicity in online communication for a while now. A common problem of these models is the presence of bias towards some words (e.g. woman, black, jew or женщина, черный, еврей) that are not toxic, but serve as triggers for the classifier due to model caveats. In this paper, we describe our efforts towards classifying hate speech in Russian, and propose simple techniques of reducing unintended bias, such as generating training data with language models using terms and words related to protected identities as context and applying word dropout to such words.
%R 10.18653/v1/2020.alw-1.8
%U https://aclanthology.org/2020.alw-1.8
%U https://doi.org/10.18653/v1/2020.alw-1.8
%P 65-69
Markdown (Informal)
[Reducing Unintended Identity Bias in Russian Hate Speech Detection](https://aclanthology.org/2020.alw-1.8) (Zueva et al., ALW 2020)
ACL