@inproceedings{roychowdhury-gupta-2023-data,
title = "Data-Efficient Methods For Improving Hate Speech Detection",
author = "Roychowdhury, Sumegh and
Gupta, Vikram",
editor = "Vlachos, Andreas and
Augenstein, Isabelle",
booktitle = "Findings of the Association for Computational Linguistics: EACL 2023",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-eacl.9/",
doi = "10.18653/v1/2023.findings-eacl.9",
pages = "125--132",
abstract = "Scarcity of large-scale datasets, especially for resource-impoverished languages motivates exploration of data-efficient methods for hate speech detection. Hateful intents are expressed explicitly (use of cuss, swear, abusive words) and implicitly (indirect and contextual). In this work, we progress implicit and explicit hate speech detection using an input-level data augmentation technique, task reformulation using entailment and cross-learning across five languages. Our proposed data augmentation technique EasyMix, improves the performance across all english datasets by {\textasciitilde}1{\%} and across multilingual datasets by {\textasciitilde}1-9{\%}. We also observe substantial gains of {\textasciitilde}2-8{\%} by reformulating hate speech detection as entail problem. We further probe the contextual models and observe that higher layers encode implicit hate while lower layers focus on explicit hate, highlighting the importance of token-level understanding for explicit and context-level for implicit hate speech detection. Code and Dataset splits - \url{https://anonymous.4open.science/r/data_efficient_hatedetect/}"
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="roychowdhury-gupta-2023-data">
<titleInfo>
<title>Data-Efficient Methods For Improving Hate Speech Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sumegh</namePart>
<namePart type="family">Roychowdhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vikram</namePart>
<namePart type="family">Gupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EACL 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Vlachos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isabelle</namePart>
<namePart type="family">Augenstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Scarcity of large-scale datasets, especially for resource-impoverished languages motivates exploration of data-efficient methods for hate speech detection. Hateful intents are expressed explicitly (use of cuss, swear, abusive words) and implicitly (indirect and contextual). In this work, we progress implicit and explicit hate speech detection using an input-level data augmentation technique, task reformulation using entailment and cross-learning across five languages. Our proposed data augmentation technique EasyMix, improves the performance across all english datasets by ~1% and across multilingual datasets by ~1-9%. We also observe substantial gains of ~2-8% by reformulating hate speech detection as entail problem. We further probe the contextual models and observe that higher layers encode implicit hate while lower layers focus on explicit hate, highlighting the importance of token-level understanding for explicit and context-level for implicit hate speech detection. Code and Dataset splits - https://anonymous.4open.science/r/data_efficient_hatedetect/</abstract>
<identifier type="citekey">roychowdhury-gupta-2023-data</identifier>
<identifier type="doi">10.18653/v1/2023.findings-eacl.9</identifier>
<location>
<url>https://aclanthology.org/2023.findings-eacl.9/</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>125</start>
<end>132</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Data-Efficient Methods For Improving Hate Speech Detection
%A Roychowdhury, Sumegh
%A Gupta, Vikram
%Y Vlachos, Andreas
%Y Augenstein, Isabelle
%S Findings of the Association for Computational Linguistics: EACL 2023
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F roychowdhury-gupta-2023-data
%X Scarcity of large-scale datasets, especially for resource-impoverished languages motivates exploration of data-efficient methods for hate speech detection. Hateful intents are expressed explicitly (use of cuss, swear, abusive words) and implicitly (indirect and contextual). In this work, we progress implicit and explicit hate speech detection using an input-level data augmentation technique, task reformulation using entailment and cross-learning across five languages. Our proposed data augmentation technique EasyMix, improves the performance across all english datasets by ~1% and across multilingual datasets by ~1-9%. We also observe substantial gains of ~2-8% by reformulating hate speech detection as entail problem. We further probe the contextual models and observe that higher layers encode implicit hate while lower layers focus on explicit hate, highlighting the importance of token-level understanding for explicit and context-level for implicit hate speech detection. Code and Dataset splits - https://anonymous.4open.science/r/data_efficient_hatedetect/
%R 10.18653/v1/2023.findings-eacl.9
%U https://aclanthology.org/2023.findings-eacl.9/
%U https://doi.org/10.18653/v1/2023.findings-eacl.9
%P 125-132
Markdown (Informal)
[Data-Efficient Methods For Improving Hate Speech Detection](https://aclanthology.org/2023.findings-eacl.9/) (Roychowdhury & Gupta, Findings 2023)
ACL