@inproceedings{lee-etal-2023-hate,
title = "Hate Speech Classifiers are Culturally Insensitive",
author = "Lee, Nayeon and
Jung, Chani and
Oh, Alice",
editor = "Dev, Sunipa and
Prabhakaran, Vinodkumar and
Adelani, David and
Hovy, Dirk and
Benotti, Luciana",
booktitle = "Proceedings of the First Workshop on Cross-Cultural Considerations in NLP (C3NLP)",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.c3nlp-1.5",
doi = "10.18653/v1/2023.c3nlp-1.5",
pages = "35--46",
abstract = "Increasingly, language models and machine translation are becoming valuable tools to help people communicate with others from diverse cultural backgrounds. However, current language models lack cultural awareness because they are trained on data representing only the culture within the dataset. This presents a problem in the context of hate speech classification, where cultural awareness is especially critical. This study aims to quantify the cultural insensitivity of three monolingual (Korean, English, Arabic) hate speech classifiers by evaluating their performance on translated datasets from the other two languages. Our research has revealed that hate speech classifiers evaluated on datasets from other cultures yield significantly lower F1 scores, up to almost 50{\%}. In addition, they produce considerably higher false negative rates, with a magnitude up to five times greater, demonstrating the extent of the cultural gap. The study highlights the severity of cultural insensitivity of language models in hate speech classification.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lee-etal-2023-hate">
<titleInfo>
<title>Hate Speech Classifiers are Culturally Insensitive</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nayeon</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chani</namePart>
<namePart type="family">Jung</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alice</namePart>
<namePart type="family">Oh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Cross-Cultural Considerations in NLP (C3NLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sunipa</namePart>
<namePart type="family">Dev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vinodkumar</namePart>
<namePart type="family">Prabhakaran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Adelani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dirk</namePart>
<namePart type="family">Hovy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luciana</namePart>
<namePart type="family">Benotti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Increasingly, language models and machine translation are becoming valuable tools to help people communicate with others from diverse cultural backgrounds. However, current language models lack cultural awareness because they are trained on data representing only the culture within the dataset. This presents a problem in the context of hate speech classification, where cultural awareness is especially critical. This study aims to quantify the cultural insensitivity of three monolingual (Korean, English, Arabic) hate speech classifiers by evaluating their performance on translated datasets from the other two languages. Our research has revealed that hate speech classifiers evaluated on datasets from other cultures yield significantly lower F1 scores, up to almost 50%. In addition, they produce considerably higher false negative rates, with a magnitude up to five times greater, demonstrating the extent of the cultural gap. The study highlights the severity of cultural insensitivity of language models in hate speech classification.</abstract>
<identifier type="citekey">lee-etal-2023-hate</identifier>
<identifier type="doi">10.18653/v1/2023.c3nlp-1.5</identifier>
<location>
<url>https://aclanthology.org/2023.c3nlp-1.5</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>35</start>
<end>46</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Hate Speech Classifiers are Culturally Insensitive
%A Lee, Nayeon
%A Jung, Chani
%A Oh, Alice
%Y Dev, Sunipa
%Y Prabhakaran, Vinodkumar
%Y Adelani, David
%Y Hovy, Dirk
%Y Benotti, Luciana
%S Proceedings of the First Workshop on Cross-Cultural Considerations in NLP (C3NLP)
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F lee-etal-2023-hate
%X Increasingly, language models and machine translation are becoming valuable tools to help people communicate with others from diverse cultural backgrounds. However, current language models lack cultural awareness because they are trained on data representing only the culture within the dataset. This presents a problem in the context of hate speech classification, where cultural awareness is especially critical. This study aims to quantify the cultural insensitivity of three monolingual (Korean, English, Arabic) hate speech classifiers by evaluating their performance on translated datasets from the other two languages. Our research has revealed that hate speech classifiers evaluated on datasets from other cultures yield significantly lower F1 scores, up to almost 50%. In addition, they produce considerably higher false negative rates, with a magnitude up to five times greater, demonstrating the extent of the cultural gap. The study highlights the severity of cultural insensitivity of language models in hate speech classification.
%R 10.18653/v1/2023.c3nlp-1.5
%U https://aclanthology.org/2023.c3nlp-1.5
%U https://doi.org/10.18653/v1/2023.c3nlp-1.5
%P 35-46
Markdown (Informal)
[Hate Speech Classifiers are Culturally Insensitive](https://aclanthology.org/2023.c3nlp-1.5) (Lee et al., C3NLP 2023)
ACL
- Nayeon Lee, Chani Jung, and Alice Oh. 2023. Hate Speech Classifiers are Culturally Insensitive. In Proceedings of the First Workshop on Cross-Cultural Considerations in NLP (C3NLP), pages 35–46, Dubrovnik, Croatia. Association for Computational Linguistics.