@inproceedings{li-etal-2026-exploratory,
title = "Exploratory As-Analyzed No-Detection of Culturally-Marked Predicate-Triggered {PII} Amplification in a Synthetic-{E}nglish {RAG} Probe: A Predicate-Resource-Confounded Audit",
author = "Li, Yanhang and
Fan, Zhichao and
Zhuang, Zexin",
editor = "Ma, Weicheng and
Vosoughi, Soroush and
Gillani, Nabeel and
Coto-Solano, Rolando",
booktitle = "Proceedings of the 1st Workshop on Stereotypes Across Cultures in Language Technologies ({S}tere{AC}u{LT} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.stereacult-1.3/",
pages = "20--34",
ISBN = "979-8-89176-408-8",
abstract = "We ask whether stereotype-loaded queries about culturally marked people leak more personal information from a retrieval-augmented generation (RAG) system than otherwise equivalent neutral queries. We pre-register a four-culture audit covering en-Anglo, es-LATAM, Arabic, and Hindi probes on a synthetic English PII corpus, comparing five paired query arms via the Stereotype-Trigger Leakage Delta (STLD). The locked confirmatory estimator was not run, so all reported tests are exploratory or sensitivity analyses, with deviations documented. We also identify a prompt-echo confound in the name-leakage metric: the model often re-emits the queried name, inflating apparent leakage without retrieval extraction. On cleaner non-name channels{---}email, phone, SSN-like identifier, and address{---}we find no stereotype-driven amplification for any culture after multiple-comparison correction. One name-included es-LATAM cell is significant in the negative direction, but matched-arm decomposition and an expanded culture-neutral control sensitivity suggest a high-leak control-predicate sampling artifact rather than a stereotype-treatment effect. Because the study is powered only for mid-sized effects and the culturally marked probe bank mixes stereotype content with cultural markers and heritage practices, we interpret the results as no detection{---}not evidence of no effect{---}of culturally marked predicate-triggered PII amplification under this synthetic-English RAG setting. The paper contributes a preregistered stereotype-as-privacy-side-channel test, diagnoses prompt-echo and predicate-resource confounds, and outlines release of the synthetic corpus, predicate bank, query generator, audit scripts, and analysis code upon acceptance"
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="li-etal-2026-exploratory">
<titleInfo>
<title>Exploratory As-Analyzed No-Detection of Culturally-Marked Predicate-Triggered PII Amplification in a Synthetic-English RAG Probe: A Predicate-Resource-Confounded Audit</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yanhang</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhichao</namePart>
<namePart type="family">Fan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zexin</namePart>
<namePart type="family">Zhuang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Stereotypes Across Cultures in Language Technologies (StereACuLT 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Weicheng</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Soroush</namePart>
<namePart type="family">Vosoughi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nabeel</namePart>
<namePart type="family">Gillani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rolando</namePart>
<namePart type="family">Coto-Solano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-408-8</identifier>
</relatedItem>
<abstract>We ask whether stereotype-loaded queries about culturally marked people leak more personal information from a retrieval-augmented generation (RAG) system than otherwise equivalent neutral queries. We pre-register a four-culture audit covering en-Anglo, es-LATAM, Arabic, and Hindi probes on a synthetic English PII corpus, comparing five paired query arms via the Stereotype-Trigger Leakage Delta (STLD). The locked confirmatory estimator was not run, so all reported tests are exploratory or sensitivity analyses, with deviations documented. We also identify a prompt-echo confound in the name-leakage metric: the model often re-emits the queried name, inflating apparent leakage without retrieval extraction. On cleaner non-name channels—email, phone, SSN-like identifier, and address—we find no stereotype-driven amplification for any culture after multiple-comparison correction. One name-included es-LATAM cell is significant in the negative direction, but matched-arm decomposition and an expanded culture-neutral control sensitivity suggest a high-leak control-predicate sampling artifact rather than a stereotype-treatment effect. Because the study is powered only for mid-sized effects and the culturally marked probe bank mixes stereotype content with cultural markers and heritage practices, we interpret the results as no detection—not evidence of no effect—of culturally marked predicate-triggered PII amplification under this synthetic-English RAG setting. The paper contributes a preregistered stereotype-as-privacy-side-channel test, diagnoses prompt-echo and predicate-resource confounds, and outlines release of the synthetic corpus, predicate bank, query generator, audit scripts, and analysis code upon acceptance</abstract>
<identifier type="citekey">li-etal-2026-exploratory</identifier>
<location>
<url>https://aclanthology.org/2026.stereacult-1.3/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>20</start>
<end>34</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Exploratory As-Analyzed No-Detection of Culturally-Marked Predicate-Triggered PII Amplification in a Synthetic-English RAG Probe: A Predicate-Resource-Confounded Audit
%A Li, Yanhang
%A Fan, Zhichao
%A Zhuang, Zexin
%Y Ma, Weicheng
%Y Vosoughi, Soroush
%Y Gillani, Nabeel
%Y Coto-Solano, Rolando
%S Proceedings of the 1st Workshop on Stereotypes Across Cultures in Language Technologies (StereACuLT 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-408-8
%F li-etal-2026-exploratory
%X We ask whether stereotype-loaded queries about culturally marked people leak more personal information from a retrieval-augmented generation (RAG) system than otherwise equivalent neutral queries. We pre-register a four-culture audit covering en-Anglo, es-LATAM, Arabic, and Hindi probes on a synthetic English PII corpus, comparing five paired query arms via the Stereotype-Trigger Leakage Delta (STLD). The locked confirmatory estimator was not run, so all reported tests are exploratory or sensitivity analyses, with deviations documented. We also identify a prompt-echo confound in the name-leakage metric: the model often re-emits the queried name, inflating apparent leakage without retrieval extraction. On cleaner non-name channels—email, phone, SSN-like identifier, and address—we find no stereotype-driven amplification for any culture after multiple-comparison correction. One name-included es-LATAM cell is significant in the negative direction, but matched-arm decomposition and an expanded culture-neutral control sensitivity suggest a high-leak control-predicate sampling artifact rather than a stereotype-treatment effect. Because the study is powered only for mid-sized effects and the culturally marked probe bank mixes stereotype content with cultural markers and heritage practices, we interpret the results as no detection—not evidence of no effect—of culturally marked predicate-triggered PII amplification under this synthetic-English RAG setting. The paper contributes a preregistered stereotype-as-privacy-side-channel test, diagnoses prompt-echo and predicate-resource confounds, and outlines release of the synthetic corpus, predicate bank, query generator, audit scripts, and analysis code upon acceptance
%U https://aclanthology.org/2026.stereacult-1.3/
%P 20-34
Markdown (Informal)
[Exploratory As-Analyzed No-Detection of Culturally-Marked Predicate-Triggered PII Amplification in a Synthetic-English RAG Probe: A Predicate-Resource-Confounded Audit](https://aclanthology.org/2026.stereacult-1.3/) (Li et al., StereACuLT 2026)
ACL