@inproceedings{verma-etal-2025-bullybench,
title = "{B}ully{B}ench: Youth {\&} Experts-in-the-loop Framework for Intrinsic and Extrinsic Cyberbullying {NLP} Benchmarking",
author = "Verma, Kanishk and
Balaaji, Sri and
Wagner, Joachim and
Kazemi, Arefeh and
Mccashin, Darragh and
Walsh@dcu, Isobel and
Basak, Sayani and
Asci, Sinan and
Cherkasova, Yelena and
Poulis, Alexandros and
Ohiggins Norman, James and
Umbach, Rebecca Umbach and
Milosevic, Tijana and
Davis, Brian",
editor = "Potdar, Saloni and
Rojas-Barahona, Lina and
Montella, Sebastien",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track",
month = nov,
year = "2025",
address = "Suzhou (China)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-industry.152/",
pages = "2172--2208",
ISBN = "979-8-89176-333-3",
abstract = "Cyberbullying (CB) involves complex relational dynamics that are often oversimplified as a binary classification task. Existing youth-focused CB datasets rely on scripted role-play, lacking conversational realism and ethical youth involvement, with little or no evaluation of their social plausibility. To address this, we introduce a \textbf{youth-in-the-loop} dataset ``\textbf{BullyBench}'' developed by adolescents (ages 15{--}16) through an ethical co-research framework. We introduce a structured \textbf{intrinsic} quality evaluation with \textbf{experts-in-the-loop} (social scientists, psychologists, and content moderators) for assessing realism, relevance, and coherence in youth CB data. Additionally, we perform \textbf{extrinsic} baseline evaluation of this dataset by benchmarking encoder- and decoder-only language models for multi-class CB role classification for future research. A three-stage annotation process by young adults refines the dataset into a gold-standard test benchmark, a high-quality resource grounded in minors' lived experiences of CB detection. Code and data are available for review"
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="verma-etal-2025-bullybench">
<titleInfo>
<title>BullyBench: Youth & Experts-in-the-loop Framework for Intrinsic and Extrinsic Cyberbullying NLP Benchmarking</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kanishk</namePart>
<namePart type="family">Verma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sri</namePart>
<namePart type="family">Balaaji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joachim</namePart>
<namePart type="family">Wagner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arefeh</namePart>
<namePart type="family">Kazemi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Darragh</namePart>
<namePart type="family">Mccashin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isobel</namePart>
<namePart type="family">Walsh@dcu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sayani</namePart>
<namePart type="family">Basak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sinan</namePart>
<namePart type="family">Asci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yelena</namePart>
<namePart type="family">Cherkasova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexandros</namePart>
<namePart type="family">Poulis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Ohiggins Norman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rebecca</namePart>
<namePart type="given">Umbach</namePart>
<namePart type="family">Umbach</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tijana</namePart>
<namePart type="family">Milosevic</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brian</namePart>
<namePart type="family">Davis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saloni</namePart>
<namePart type="family">Potdar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lina</namePart>
<namePart type="family">Rojas-Barahona</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastien</namePart>
<namePart type="family">Montella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou (China)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-333-3</identifier>
</relatedItem>
<abstract>Cyberbullying (CB) involves complex relational dynamics that are often oversimplified as a binary classification task. Existing youth-focused CB datasets rely on scripted role-play, lacking conversational realism and ethical youth involvement, with little or no evaluation of their social plausibility. To address this, we introduce a youth-in-the-loop dataset “BullyBench” developed by adolescents (ages 15–16) through an ethical co-research framework. We introduce a structured intrinsic quality evaluation with experts-in-the-loop (social scientists, psychologists, and content moderators) for assessing realism, relevance, and coherence in youth CB data. Additionally, we perform extrinsic baseline evaluation of this dataset by benchmarking encoder- and decoder-only language models for multi-class CB role classification for future research. A three-stage annotation process by young adults refines the dataset into a gold-standard test benchmark, a high-quality resource grounded in minors’ lived experiences of CB detection. Code and data are available for review</abstract>
<identifier type="citekey">verma-etal-2025-bullybench</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-industry.152/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>2172</start>
<end>2208</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BullyBench: Youth & Experts-in-the-loop Framework for Intrinsic and Extrinsic Cyberbullying NLP Benchmarking
%A Verma, Kanishk
%A Balaaji, Sri
%A Wagner, Joachim
%A Kazemi, Arefeh
%A Mccashin, Darragh
%A Walsh@dcu, Isobel
%A Basak, Sayani
%A Asci, Sinan
%A Cherkasova, Yelena
%A Poulis, Alexandros
%A Ohiggins Norman, James
%A Umbach, Rebecca Umbach
%A Milosevic, Tijana
%A Davis, Brian
%Y Potdar, Saloni
%Y Rojas-Barahona, Lina
%Y Montella, Sebastien
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou (China)
%@ 979-8-89176-333-3
%F verma-etal-2025-bullybench
%X Cyberbullying (CB) involves complex relational dynamics that are often oversimplified as a binary classification task. Existing youth-focused CB datasets rely on scripted role-play, lacking conversational realism and ethical youth involvement, with little or no evaluation of their social plausibility. To address this, we introduce a youth-in-the-loop dataset “BullyBench” developed by adolescents (ages 15–16) through an ethical co-research framework. We introduce a structured intrinsic quality evaluation with experts-in-the-loop (social scientists, psychologists, and content moderators) for assessing realism, relevance, and coherence in youth CB data. Additionally, we perform extrinsic baseline evaluation of this dataset by benchmarking encoder- and decoder-only language models for multi-class CB role classification for future research. A three-stage annotation process by young adults refines the dataset into a gold-standard test benchmark, a high-quality resource grounded in minors’ lived experiences of CB detection. Code and data are available for review
%U https://aclanthology.org/2025.emnlp-industry.152/
%P 2172-2208
Markdown (Informal)
[BullyBench: Youth & Experts-in-the-loop Framework for Intrinsic and Extrinsic Cyberbullying NLP Benchmarking](https://aclanthology.org/2025.emnlp-industry.152/) (Verma et al., EMNLP 2025)
ACL
- Kanishk Verma, Sri Balaaji, Joachim Wagner, Arefeh Kazemi, Darragh Mccashin, Isobel Walsh@dcu, Sayani Basak, Sinan Asci, Yelena Cherkasova, Alexandros Poulis, James Ohiggins Norman, Rebecca Umbach Umbach, Tijana Milosevic, and Brian Davis. 2025. BullyBench: Youth & Experts-in-the-loop Framework for Intrinsic and Extrinsic Cyberbullying NLP Benchmarking. In Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track, pages 2172–2208, Suzhou (China). Association for Computational Linguistics.