@inproceedings{das-balke-2024-toximatics,
title = "Toximatics: Towards Understanding Toxicity in Real-Life Social Situations",
author = "Das, Mayukh and
Balke, Wolf-Tilo",
editor = "Kawahara, Tatsuya and
Demberg, Vera and
Ultes, Stefan and
Inoue, Koji and
Mehri, Shikib and
Howcroft, David and
Komatani, Kazunori",
booktitle = "Proceedings of the 25th Annual Meeting of the Special Interest Group on Discourse and Dialogue",
month = sep,
year = "2024",
address = "Kyoto, Japan",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.sigdial-1.65",
doi = "10.18653/v1/2024.sigdial-1.65",
pages = "770--785",
abstract = "The proliferation of social media has increased the visibility and effects of hate speech. To address this, NLP solutions have been developed to identify both explicit and implicit forms of hate speech. Typically, these approaches evaluate the toxicity of utterances in isolation, ignoring the context. Drawing on pragmatics, our study examines how contextual factors can influence the perceived toxicity of utterances, thereby anchoring assessments in a more nuanced semantic framework. We present Toximatics, a dataset that includes context-dependent utterances and it{'}s toxicity score. We also introduce a novel synthetic data generation pipeline designed to create context-utterance pairs at scale with controlled polarity. This pipeline can enhance existing hate speech datasets by adding contextual information to utterances, either preserving or altering their polarity, and also generate completely new pairs from seed statements. We utilised both features to create Toximatics. To address biases in state-of-the-art hate datasets, which often skew towards specific sensitive topics such as politics, race, and gender, we propose a method to generate neutral utterances typical of various social settings. These are then contextualized to show how neutrality can shift to toxicity or benignity depending on the surrounding context. The evaluation results clearly indicate that the current models are underperforming on this dataset.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="das-balke-2024-toximatics">
<titleInfo>
<title>Toximatics: Towards Understanding Toxicity in Real-Life Social Situations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mayukh</namePart>
<namePart type="family">Das</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wolf-Tilo</namePart>
<namePart type="family">Balke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 25th Annual Meeting of the Special Interest Group on Discourse and Dialogue</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tatsuya</namePart>
<namePart type="family">Kawahara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefan</namePart>
<namePart type="family">Ultes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Koji</namePart>
<namePart type="family">Inoue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shikib</namePart>
<namePart type="family">Mehri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Howcroft</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kazunori</namePart>
<namePart type="family">Komatani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Kyoto, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The proliferation of social media has increased the visibility and effects of hate speech. To address this, NLP solutions have been developed to identify both explicit and implicit forms of hate speech. Typically, these approaches evaluate the toxicity of utterances in isolation, ignoring the context. Drawing on pragmatics, our study examines how contextual factors can influence the perceived toxicity of utterances, thereby anchoring assessments in a more nuanced semantic framework. We present Toximatics, a dataset that includes context-dependent utterances and it’s toxicity score. We also introduce a novel synthetic data generation pipeline designed to create context-utterance pairs at scale with controlled polarity. This pipeline can enhance existing hate speech datasets by adding contextual information to utterances, either preserving or altering their polarity, and also generate completely new pairs from seed statements. We utilised both features to create Toximatics. To address biases in state-of-the-art hate datasets, which often skew towards specific sensitive topics such as politics, race, and gender, we propose a method to generate neutral utterances typical of various social settings. These are then contextualized to show how neutrality can shift to toxicity or benignity depending on the surrounding context. The evaluation results clearly indicate that the current models are underperforming on this dataset.</abstract>
<identifier type="citekey">das-balke-2024-toximatics</identifier>
<identifier type="doi">10.18653/v1/2024.sigdial-1.65</identifier>
<location>
<url>https://aclanthology.org/2024.sigdial-1.65</url>
</location>
<part>
<date>2024-09</date>
<extent unit="page">
<start>770</start>
<end>785</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Toximatics: Towards Understanding Toxicity in Real-Life Social Situations
%A Das, Mayukh
%A Balke, Wolf-Tilo
%Y Kawahara, Tatsuya
%Y Demberg, Vera
%Y Ultes, Stefan
%Y Inoue, Koji
%Y Mehri, Shikib
%Y Howcroft, David
%Y Komatani, Kazunori
%S Proceedings of the 25th Annual Meeting of the Special Interest Group on Discourse and Dialogue
%D 2024
%8 September
%I Association for Computational Linguistics
%C Kyoto, Japan
%F das-balke-2024-toximatics
%X The proliferation of social media has increased the visibility and effects of hate speech. To address this, NLP solutions have been developed to identify both explicit and implicit forms of hate speech. Typically, these approaches evaluate the toxicity of utterances in isolation, ignoring the context. Drawing on pragmatics, our study examines how contextual factors can influence the perceived toxicity of utterances, thereby anchoring assessments in a more nuanced semantic framework. We present Toximatics, a dataset that includes context-dependent utterances and it’s toxicity score. We also introduce a novel synthetic data generation pipeline designed to create context-utterance pairs at scale with controlled polarity. This pipeline can enhance existing hate speech datasets by adding contextual information to utterances, either preserving or altering their polarity, and also generate completely new pairs from seed statements. We utilised both features to create Toximatics. To address biases in state-of-the-art hate datasets, which often skew towards specific sensitive topics such as politics, race, and gender, we propose a method to generate neutral utterances typical of various social settings. These are then contextualized to show how neutrality can shift to toxicity or benignity depending on the surrounding context. The evaluation results clearly indicate that the current models are underperforming on this dataset.
%R 10.18653/v1/2024.sigdial-1.65
%U https://aclanthology.org/2024.sigdial-1.65
%U https://doi.org/10.18653/v1/2024.sigdial-1.65
%P 770-785
Markdown (Informal)
[Toximatics: Towards Understanding Toxicity in Real-Life Social Situations](https://aclanthology.org/2024.sigdial-1.65) (Das & Balke, SIGDIAL 2024)
ACL