@inproceedings{cuevas-etal-2025-anecdoctoring,
title = "Anecdoctoring: Automated Red-Teaming Across Language and Place",
author = "Cuevas, Alejandro and
Dash, Saloni and
Nayak, Bharat Kumar and
Vann, Dan and
Daepp, Madeleine I. G.",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-main.964/",
doi = "10.18653/v1/2025.emnlp-main.964",
pages = "19066--19085",
ISBN = "979-8-89176-332-6",
abstract = "Disinformation is among the top risks of generative artificial intelligence (AI) misuse. Global adoption of generative AI necessitates red-teaming evaluations (i.e., systematic adversarial probing) that are robust across diverse languages and cultures, but red-teaming datasets are commonly US- and English-centric. To address this gap, we propose ``anecdoctoring'', a novel red-teaming approach that automatically generates adversarial prompts across languages and cultures. We collect misinformation claims from fact-checking websites in three languages (English, Spanish, and Hindi) and two geographies (US and India). We then cluster individual claims into broader narratives and characterize the resulting clusters with knowledge graphs, with which we augment an attacker LLM. Our method produces higher attack success rates and offers interpretability benefits relative to few-shot prompting. Results underscore the need for disinformation mitigations that scale globally and are grounded in real-world adversarial misuse."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cuevas-etal-2025-anecdoctoring">
<titleInfo>
<title>Anecdoctoring: Automated Red-Teaming Across Language and Place</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alejandro</namePart>
<namePart type="family">Cuevas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saloni</namePart>
<namePart type="family">Dash</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bharat</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Nayak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Vann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Madeleine</namePart>
<namePart type="given">I</namePart>
<namePart type="given">G</namePart>
<namePart type="family">Daepp</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-332-6</identifier>
</relatedItem>
<abstract>Disinformation is among the top risks of generative artificial intelligence (AI) misuse. Global adoption of generative AI necessitates red-teaming evaluations (i.e., systematic adversarial probing) that are robust across diverse languages and cultures, but red-teaming datasets are commonly US- and English-centric. To address this gap, we propose “anecdoctoring”, a novel red-teaming approach that automatically generates adversarial prompts across languages and cultures. We collect misinformation claims from fact-checking websites in three languages (English, Spanish, and Hindi) and two geographies (US and India). We then cluster individual claims into broader narratives and characterize the resulting clusters with knowledge graphs, with which we augment an attacker LLM. Our method produces higher attack success rates and offers interpretability benefits relative to few-shot prompting. Results underscore the need for disinformation mitigations that scale globally and are grounded in real-world adversarial misuse.</abstract>
<identifier type="citekey">cuevas-etal-2025-anecdoctoring</identifier>
<identifier type="doi">10.18653/v1/2025.emnlp-main.964</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-main.964/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>19066</start>
<end>19085</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Anecdoctoring: Automated Red-Teaming Across Language and Place
%A Cuevas, Alejandro
%A Dash, Saloni
%A Nayak, Bharat Kumar
%A Vann, Dan
%A Daepp, Madeleine I. G.
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-332-6
%F cuevas-etal-2025-anecdoctoring
%X Disinformation is among the top risks of generative artificial intelligence (AI) misuse. Global adoption of generative AI necessitates red-teaming evaluations (i.e., systematic adversarial probing) that are robust across diverse languages and cultures, but red-teaming datasets are commonly US- and English-centric. To address this gap, we propose “anecdoctoring”, a novel red-teaming approach that automatically generates adversarial prompts across languages and cultures. We collect misinformation claims from fact-checking websites in three languages (English, Spanish, and Hindi) and two geographies (US and India). We then cluster individual claims into broader narratives and characterize the resulting clusters with knowledge graphs, with which we augment an attacker LLM. Our method produces higher attack success rates and offers interpretability benefits relative to few-shot prompting. Results underscore the need for disinformation mitigations that scale globally and are grounded in real-world adversarial misuse.
%R 10.18653/v1/2025.emnlp-main.964
%U https://aclanthology.org/2025.emnlp-main.964/
%U https://doi.org/10.18653/v1/2025.emnlp-main.964
%P 19066-19085
Markdown (Informal)
[Anecdoctoring: Automated Red-Teaming Across Language and Place](https://aclanthology.org/2025.emnlp-main.964/) (Cuevas et al., EMNLP 2025)
ACL
- Alejandro Cuevas, Saloni Dash, Bharat Kumar Nayak, Dan Vann, and Madeleine I. G. Daepp. 2025. Anecdoctoring: Automated Red-Teaming Across Language and Place. In Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing, pages 19066–19085, Suzhou, China. Association for Computational Linguistics.