@inproceedings{bruno-etal-2024-towards,
title = "Towards a Hate Speech Index with Attention-based {LSTM}s and {XLM}-{R}o{BERT}a",
author = "Bruno, Mauro and
Catanese, Elena and
Ortame, Francesco",
editor = "Dell'Orletta, Felice and
Lenci, Alessandro and
Montemagni, Simonetta and
Sprugnoli, Rachele",
booktitle = "Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)",
month = dec,
year = "2024",
address = "Pisa, Italy",
publisher = "CEUR Workshop Proceedings",
url = "https://aclanthology.org/2024.clicit-1.14/",
pages = "106--113",
ISBN = "979-12-210-7060-6",
abstract = "The uncontrolled diffusion of hate speech on social media requires robust detection mechanisms to measure its harmful impact. Analyzing texts from X (formerly Twitter) is challenging due to slang, neologisms, and sarcasm, which require advanced and intelligent detection approaches. While sophisticated models like large language models (LLMs) demonstrate impressive accuracy, their prohibitive inference times make it impractical to process millions of tweets. Therefore, we propose a mixed approach using a bidirectional long short-term memory model with an added attention mechanism (AT-BiLSTM) for improved natural language understanding. We benchmark this model against a standard BiLSTM model and a fine-tuned multilingual robustly optimized BERT (RoBERTa).The task of hate speech detection has been extensively explored in the EVALITA campaigns, which have achieved impressive results. Building on this foundation, we aim to develop a robust classifier to predict the content of approximately 20 million tweets related to immigration. The performance of our models is comparable to the top entries from the EVALITA campaigns, and we show the effects of training different networks on the dynamics of the Hate Speech Index (HSI). We also utilize a custom labeled dataset for benchmarking and training."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bruno-etal-2024-towards">
<titleInfo>
<title>Towards a Hate Speech Index with Attention-based LSTMs and XLM-RoBERTa</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mauro</namePart>
<namePart type="family">Bruno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elena</namePart>
<namePart type="family">Catanese</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francesco</namePart>
<namePart type="family">Ortame</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Felice</namePart>
<namePart type="family">Dell’Orletta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simonetta</namePart>
<namePart type="family">Montemagni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rachele</namePart>
<namePart type="family">Sprugnoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>CEUR Workshop Proceedings</publisher>
<place>
<placeTerm type="text">Pisa, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-12-210-7060-6</identifier>
</relatedItem>
<abstract>The uncontrolled diffusion of hate speech on social media requires robust detection mechanisms to measure its harmful impact. Analyzing texts from X (formerly Twitter) is challenging due to slang, neologisms, and sarcasm, which require advanced and intelligent detection approaches. While sophisticated models like large language models (LLMs) demonstrate impressive accuracy, their prohibitive inference times make it impractical to process millions of tweets. Therefore, we propose a mixed approach using a bidirectional long short-term memory model with an added attention mechanism (AT-BiLSTM) for improved natural language understanding. We benchmark this model against a standard BiLSTM model and a fine-tuned multilingual robustly optimized BERT (RoBERTa).The task of hate speech detection has been extensively explored in the EVALITA campaigns, which have achieved impressive results. Building on this foundation, we aim to develop a robust classifier to predict the content of approximately 20 million tweets related to immigration. The performance of our models is comparable to the top entries from the EVALITA campaigns, and we show the effects of training different networks on the dynamics of the Hate Speech Index (HSI). We also utilize a custom labeled dataset for benchmarking and training.</abstract>
<identifier type="citekey">bruno-etal-2024-towards</identifier>
<location>
<url>https://aclanthology.org/2024.clicit-1.14/</url>
</location>
<part>
<date>2024-12</date>
<extent unit="page">
<start>106</start>
<end>113</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards a Hate Speech Index with Attention-based LSTMs and XLM-RoBERTa
%A Bruno, Mauro
%A Catanese, Elena
%A Ortame, Francesco
%Y Dell’Orletta, Felice
%Y Lenci, Alessandro
%Y Montemagni, Simonetta
%Y Sprugnoli, Rachele
%S Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)
%D 2024
%8 December
%I CEUR Workshop Proceedings
%C Pisa, Italy
%@ 979-12-210-7060-6
%F bruno-etal-2024-towards
%X The uncontrolled diffusion of hate speech on social media requires robust detection mechanisms to measure its harmful impact. Analyzing texts from X (formerly Twitter) is challenging due to slang, neologisms, and sarcasm, which require advanced and intelligent detection approaches. While sophisticated models like large language models (LLMs) demonstrate impressive accuracy, their prohibitive inference times make it impractical to process millions of tweets. Therefore, we propose a mixed approach using a bidirectional long short-term memory model with an added attention mechanism (AT-BiLSTM) for improved natural language understanding. We benchmark this model against a standard BiLSTM model and a fine-tuned multilingual robustly optimized BERT (RoBERTa).The task of hate speech detection has been extensively explored in the EVALITA campaigns, which have achieved impressive results. Building on this foundation, we aim to develop a robust classifier to predict the content of approximately 20 million tweets related to immigration. The performance of our models is comparable to the top entries from the EVALITA campaigns, and we show the effects of training different networks on the dynamics of the Hate Speech Index (HSI). We also utilize a custom labeled dataset for benchmarking and training.
%U https://aclanthology.org/2024.clicit-1.14/
%P 106-113
Markdown (Informal)
[Towards a Hate Speech Index with Attention-based LSTMs and XLM-RoBERTa](https://aclanthology.org/2024.clicit-1.14/) (Bruno et al., CLiC-it 2024)
ACL