@inproceedings{nghiem-daume-iii-2024-hatecot,
title = "{H}ate{COT}: An Explanation-Enhanced Dataset for Generalizable Offensive Speech Detection via Large Language Models",
author = "Nghiem, Huy and
Daum{\'e} Iii, Hal",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.343",
pages = "5938--5956",
abstract = "The widespread use of social media necessitates reliable and efficient detection of offensive content to mitigate harmful effects. Although sophisticated models perform well on individual datasets, they often fail to generalize due to varying definitions and labeling of {``}offensive content.{''} In this paper, we introduce HateCOT, an English dataset with over 52,000 samples from diverse sources, featuring explanations generated by GPT-3.5Turbo and curated by humans. We demonstrate that pretraining on HateCOT significantly enhances the performance of open-source Large Language Models on three benchmark datasets for offensive content detection in both zero-shot and few-shot settings, despite differences in domain and task. Additionally, HateCOT facilitates effective K-shot fine-tuning of LLMs with limited data and improves the quality of their explanations, as confirmed by our human evaluation.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nghiem-daume-iii-2024-hatecot">
<titleInfo>
<title>HateCOT: An Explanation-Enhanced Dataset for Generalizable Offensive Speech Detection via Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Huy</namePart>
<namePart type="family">Nghiem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hal</namePart>
<namePart type="family">Daumé Iii</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The widespread use of social media necessitates reliable and efficient detection of offensive content to mitigate harmful effects. Although sophisticated models perform well on individual datasets, they often fail to generalize due to varying definitions and labeling of “offensive content.” In this paper, we introduce HateCOT, an English dataset with over 52,000 samples from diverse sources, featuring explanations generated by GPT-3.5Turbo and curated by humans. We demonstrate that pretraining on HateCOT significantly enhances the performance of open-source Large Language Models on three benchmark datasets for offensive content detection in both zero-shot and few-shot settings, despite differences in domain and task. Additionally, HateCOT facilitates effective K-shot fine-tuning of LLMs with limited data and improves the quality of their explanations, as confirmed by our human evaluation.</abstract>
<identifier type="citekey">nghiem-daume-iii-2024-hatecot</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.343</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>5938</start>
<end>5956</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T HateCOT: An Explanation-Enhanced Dataset for Generalizable Offensive Speech Detection via Large Language Models
%A Nghiem, Huy
%A Daumé Iii, Hal
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F nghiem-daume-iii-2024-hatecot
%X The widespread use of social media necessitates reliable and efficient detection of offensive content to mitigate harmful effects. Although sophisticated models perform well on individual datasets, they often fail to generalize due to varying definitions and labeling of “offensive content.” In this paper, we introduce HateCOT, an English dataset with over 52,000 samples from diverse sources, featuring explanations generated by GPT-3.5Turbo and curated by humans. We demonstrate that pretraining on HateCOT significantly enhances the performance of open-source Large Language Models on three benchmark datasets for offensive content detection in both zero-shot and few-shot settings, despite differences in domain and task. Additionally, HateCOT facilitates effective K-shot fine-tuning of LLMs with limited data and improves the quality of their explanations, as confirmed by our human evaluation.
%U https://aclanthology.org/2024.findings-emnlp.343
%P 5938-5956
Markdown (Informal)
[HateCOT: An Explanation-Enhanced Dataset for Generalizable Offensive Speech Detection via Large Language Models](https://aclanthology.org/2024.findings-emnlp.343) (Nghiem & Daumé Iii, Findings 2024)
ACL