@inproceedings{di-bonaventura-etal-2025-hatevolution,
title = "Hatevolution: What Static Benchmarks Don{'}t Tell Us",
author = "Di Bonaventura, Chiara and
McGillivray, Barbara and
He, Yulan and
Mero{\~n}o-Pe{\~n}uela, Albert",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.910/",
doi = "10.18653/v1/2025.findings-acl.910",
pages = "17695--17707",
ISBN = "979-8-89176-256-5",
abstract = "Language changes over time, including in the hate speech domain, which evolves quickly following social dynamics and cultural shifts. While NLP research has investigated the impact of language evolution on model training and has proposed several solutions for it, its impact on model benchmarking remains under-explored. Yet, hate speech benchmarks play a crucial role to ensure model safety. In this paper, we empirically evaluate the robustness of 20 language models across two evolving hate speech experiments, and we show the temporal misalignment between static and time-sensitive evaluations. Our findings call for time-sensitive linguistic benchmarks in order to correctly and reliably evaluate language models in the hate speech domain."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="di-bonaventura-etal-2025-hatevolution">
<titleInfo>
<title>Hatevolution: What Static Benchmarks Don’t Tell Us</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chiara</namePart>
<namePart type="family">Di Bonaventura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="family">McGillivray</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Albert</namePart>
<namePart type="family">Meroño-Peñuela</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-256-5</identifier>
</relatedItem>
<abstract>Language changes over time, including in the hate speech domain, which evolves quickly following social dynamics and cultural shifts. While NLP research has investigated the impact of language evolution on model training and has proposed several solutions for it, its impact on model benchmarking remains under-explored. Yet, hate speech benchmarks play a crucial role to ensure model safety. In this paper, we empirically evaluate the robustness of 20 language models across two evolving hate speech experiments, and we show the temporal misalignment between static and time-sensitive evaluations. Our findings call for time-sensitive linguistic benchmarks in order to correctly and reliably evaluate language models in the hate speech domain.</abstract>
<identifier type="citekey">di-bonaventura-etal-2025-hatevolution</identifier>
<identifier type="doi">10.18653/v1/2025.findings-acl.910</identifier>
<location>
<url>https://aclanthology.org/2025.findings-acl.910/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>17695</start>
<end>17707</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Hatevolution: What Static Benchmarks Don’t Tell Us
%A Di Bonaventura, Chiara
%A McGillivray, Barbara
%A He, Yulan
%A Meroño-Peñuela, Albert
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Findings of the Association for Computational Linguistics: ACL 2025
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-256-5
%F di-bonaventura-etal-2025-hatevolution
%X Language changes over time, including in the hate speech domain, which evolves quickly following social dynamics and cultural shifts. While NLP research has investigated the impact of language evolution on model training and has proposed several solutions for it, its impact on model benchmarking remains under-explored. Yet, hate speech benchmarks play a crucial role to ensure model safety. In this paper, we empirically evaluate the robustness of 20 language models across two evolving hate speech experiments, and we show the temporal misalignment between static and time-sensitive evaluations. Our findings call for time-sensitive linguistic benchmarks in order to correctly and reliably evaluate language models in the hate speech domain.
%R 10.18653/v1/2025.findings-acl.910
%U https://aclanthology.org/2025.findings-acl.910/
%U https://doi.org/10.18653/v1/2025.findings-acl.910
%P 17695-17707
Markdown (Informal)
[Hatevolution: What Static Benchmarks Don’t Tell Us](https://aclanthology.org/2025.findings-acl.910/) (Di Bonaventura et al., Findings 2025)
ACL
- Chiara Di Bonaventura, Barbara McGillivray, Yulan He, and Albert Meroño-Peñuela. 2025. Hatevolution: What Static Benchmarks Don’t Tell Us. In Findings of the Association for Computational Linguistics: ACL 2025, pages 17695–17707, Vienna, Austria. Association for Computational Linguistics.