@inproceedings{jin-etal-2023-towards,
title = "Towards Weakly-Supervised Hate Speech Classification Across Datasets",
author = "Jin, Yiping and
Wanner, Leo and
Kadam, Vishakha and
Shvets, Alexander",
editor = {Chung, Yi-ling and
R{{\textbackslash}"ottger}, Paul and
Nozza, Debora and
Talat, Zeerak and
Mostafazadeh Davani, Aida},
booktitle = "The 7th Workshop on Online Abuse and Harms (WOAH)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.woah-1.4",
doi = "10.18653/v1/2023.woah-1.4",
pages = "42--59",
abstract = "As pointed out by several scholars, current research on hate speech (HS) recognition is characterized by unsystematic data creation strategies and diverging annotation schemata. Subsequently, supervised-learning models tend to generalize poorly to datasets they were not trained on, and the performance of the models trained on datasets labeled using different HS taxonomies cannot be compared. To ease this problem, we propose applying extremely weak supervision that only relies on the class name rather than on class samples from the annotated data. We demonstrate the effectiveness of a state-of-the-art weakly-supervised text classification model in various in-dataset and cross-dataset settings. Furthermore, we conduct an in-depth quantitative and qualitative analysis of the source of poor generalizability of HS classification models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jin-etal-2023-towards">
<titleInfo>
<title>Towards Weakly-Supervised Hate Speech Classification Across Datasets</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yiping</namePart>
<namePart type="family">Jin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vishakha</namePart>
<namePart type="family">Kadam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Shvets</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>The 7th Workshop on Online Abuse and Harms (WOAH)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yi-ling</namePart>
<namePart type="family">Chung</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">R\textbackslash”ottger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debora</namePart>
<namePart type="family">Nozza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zeerak</namePart>
<namePart type="family">Talat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aida</namePart>
<namePart type="family">Mostafazadeh Davani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>As pointed out by several scholars, current research on hate speech (HS) recognition is characterized by unsystematic data creation strategies and diverging annotation schemata. Subsequently, supervised-learning models tend to generalize poorly to datasets they were not trained on, and the performance of the models trained on datasets labeled using different HS taxonomies cannot be compared. To ease this problem, we propose applying extremely weak supervision that only relies on the class name rather than on class samples from the annotated data. We demonstrate the effectiveness of a state-of-the-art weakly-supervised text classification model in various in-dataset and cross-dataset settings. Furthermore, we conduct an in-depth quantitative and qualitative analysis of the source of poor generalizability of HS classification models.</abstract>
<identifier type="citekey">jin-etal-2023-towards</identifier>
<identifier type="doi">10.18653/v1/2023.woah-1.4</identifier>
<location>
<url>https://aclanthology.org/2023.woah-1.4</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>42</start>
<end>59</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards Weakly-Supervised Hate Speech Classification Across Datasets
%A Jin, Yiping
%A Wanner, Leo
%A Kadam, Vishakha
%A Shvets, Alexander
%Y Chung, Yi-ling
%Y R\textbackslash”ottger, Paul
%Y Nozza, Debora
%Y Talat, Zeerak
%Y Mostafazadeh Davani, Aida
%S The 7th Workshop on Online Abuse and Harms (WOAH)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F jin-etal-2023-towards
%X As pointed out by several scholars, current research on hate speech (HS) recognition is characterized by unsystematic data creation strategies and diverging annotation schemata. Subsequently, supervised-learning models tend to generalize poorly to datasets they were not trained on, and the performance of the models trained on datasets labeled using different HS taxonomies cannot be compared. To ease this problem, we propose applying extremely weak supervision that only relies on the class name rather than on class samples from the annotated data. We demonstrate the effectiveness of a state-of-the-art weakly-supervised text classification model in various in-dataset and cross-dataset settings. Furthermore, we conduct an in-depth quantitative and qualitative analysis of the source of poor generalizability of HS classification models.
%R 10.18653/v1/2023.woah-1.4
%U https://aclanthology.org/2023.woah-1.4
%U https://doi.org/10.18653/v1/2023.woah-1.4
%P 42-59
Markdown (Informal)
[Towards Weakly-Supervised Hate Speech Classification Across Datasets](https://aclanthology.org/2023.woah-1.4) (Jin et al., WOAH 2023)
ACL