@inproceedings{bennie-etal-2025-panda,
title = "{PANDA} - Paired Anti-hate Narratives Dataset from {A}sia: Using an {LLM}-as-a-Judge to Create the First {C}hinese Counterspeech Dataset",
author = "Bennie, Michael and
Zhang, Demi and
Xiao, Bushi and
Cao, Jing and
Liu, Chryseis Xinyi and
Meng, Jian and
Tripp, Alayo",
editor = "Bonaldi, Helena and
Vallecillo-Rodr{\'i}guez, Mar{\'i}a Estrella and
Zubiaga, Irune and
Montejo-R{\'a}ez, Arturo and
Soroa, Aitor and
Mart{\'i}n-Valdivia, Mar{\'i}a Teresa and
Guerini, Marco and
Agerri, Rodrigo",
booktitle = "Proceedings of the First Workshop on Multilingual Counterspeech Generation",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.mcg-1.1/",
pages = "1--12",
abstract = "Despite the global prevalence of Modern Standard Chinese language, counterspeech (CS) resources for Chinese remain virtually nonexistent. To address this gap in East Asian counterspeech research we introduce the a corpus of Modern Standard Mandarin counterspeech that focuses on combating hate speech in Mainland China. This paper proposes a novel approach of generating CS by using an LLM-as-a-Judge, simulated annealing, LLMs zero-shot CN generation and a round-robin algorithm. This is followed by manual verification for quality and contextual relevance. This paper details the methodology for creating effective counterspeech in Chinese and other non-Eurocentric languages, including unique cultural patterns of which groups are maligned and linguistic patterns in what kinds of discourse markers are programmatically marked as hate speech (HS). Analysis of the generated corpora, we provide strong evidence for the lack of open-source, properly labeled Chinese hate speech data and the limitations of using an LLM-as-Judge to score possible answers in Chinese. Moreover, the present corpus servers as the first East Asian language based CS corpus and provides an essential resource for future research on counterspeech generation and evaluation."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bennie-etal-2025-panda">
<titleInfo>
<title>PANDA - Paired Anti-hate Narratives Dataset from Asia: Using an LLM-as-a-Judge to Create the First Chinese Counterspeech Dataset</title>
</titleInfo>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Bennie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Demi</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bushi</namePart>
<namePart type="family">Xiao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jing</namePart>
<namePart type="family">Cao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chryseis</namePart>
<namePart type="given">Xinyi</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jian</namePart>
<namePart type="family">Meng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alayo</namePart>
<namePart type="family">Tripp</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Multilingual Counterspeech Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Bonaldi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">María</namePart>
<namePart type="given">Estrella</namePart>
<namePart type="family">Vallecillo-Rodríguez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Irune</namePart>
<namePart type="family">Zubiaga</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arturo</namePart>
<namePart type="family">Montejo-Ráez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aitor</namePart>
<namePart type="family">Soroa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">María</namePart>
<namePart type="given">Teresa</namePart>
<namePart type="family">Martín-Valdivia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Guerini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rodrigo</namePart>
<namePart type="family">Agerri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Despite the global prevalence of Modern Standard Chinese language, counterspeech (CS) resources for Chinese remain virtually nonexistent. To address this gap in East Asian counterspeech research we introduce the a corpus of Modern Standard Mandarin counterspeech that focuses on combating hate speech in Mainland China. This paper proposes a novel approach of generating CS by using an LLM-as-a-Judge, simulated annealing, LLMs zero-shot CN generation and a round-robin algorithm. This is followed by manual verification for quality and contextual relevance. This paper details the methodology for creating effective counterspeech in Chinese and other non-Eurocentric languages, including unique cultural patterns of which groups are maligned and linguistic patterns in what kinds of discourse markers are programmatically marked as hate speech (HS). Analysis of the generated corpora, we provide strong evidence for the lack of open-source, properly labeled Chinese hate speech data and the limitations of using an LLM-as-Judge to score possible answers in Chinese. Moreover, the present corpus servers as the first East Asian language based CS corpus and provides an essential resource for future research on counterspeech generation and evaluation.</abstract>
<identifier type="citekey">bennie-etal-2025-panda</identifier>
<location>
<url>https://aclanthology.org/2025.mcg-1.1/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>1</start>
<end>12</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T PANDA - Paired Anti-hate Narratives Dataset from Asia: Using an LLM-as-a-Judge to Create the First Chinese Counterspeech Dataset
%A Bennie, Michael
%A Zhang, Demi
%A Xiao, Bushi
%A Cao, Jing
%A Liu, Chryseis Xinyi
%A Meng, Jian
%A Tripp, Alayo
%Y Bonaldi, Helena
%Y Vallecillo-Rodríguez, María Estrella
%Y Zubiaga, Irune
%Y Montejo-Ráez, Arturo
%Y Soroa, Aitor
%Y Martín-Valdivia, María Teresa
%Y Guerini, Marco
%Y Agerri, Rodrigo
%S Proceedings of the First Workshop on Multilingual Counterspeech Generation
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F bennie-etal-2025-panda
%X Despite the global prevalence of Modern Standard Chinese language, counterspeech (CS) resources for Chinese remain virtually nonexistent. To address this gap in East Asian counterspeech research we introduce the a corpus of Modern Standard Mandarin counterspeech that focuses on combating hate speech in Mainland China. This paper proposes a novel approach of generating CS by using an LLM-as-a-Judge, simulated annealing, LLMs zero-shot CN generation and a round-robin algorithm. This is followed by manual verification for quality and contextual relevance. This paper details the methodology for creating effective counterspeech in Chinese and other non-Eurocentric languages, including unique cultural patterns of which groups are maligned and linguistic patterns in what kinds of discourse markers are programmatically marked as hate speech (HS). Analysis of the generated corpora, we provide strong evidence for the lack of open-source, properly labeled Chinese hate speech data and the limitations of using an LLM-as-Judge to score possible answers in Chinese. Moreover, the present corpus servers as the first East Asian language based CS corpus and provides an essential resource for future research on counterspeech generation and evaluation.
%U https://aclanthology.org/2025.mcg-1.1/
%P 1-12
Markdown (Informal)
[PANDA - Paired Anti-hate Narratives Dataset from Asia: Using an LLM-as-a-Judge to Create the First Chinese Counterspeech Dataset](https://aclanthology.org/2025.mcg-1.1/) (Bennie et al., MCG 2025)
ACL