@inproceedings{li-etal-2026-privacy,
title = "Where Privacy Risk Lives in {E}nglish-Source Multilingual {RAG}: A Stage-Decomposed Audit Across Five Query Languages",
author = "Li, Yanhang and
Fan, Zhichao and
Zhuang, Zexin",
editor = "Huang, Kaiyu and
Mo, Fengran and
Chen, Pinzhen and
Jiang, Meng",
booktitle = "Proceedings of the 1st Workshop on Multilinguality in the Era of Large Language Models ({M}e{LLM} 2026)",
month = jul,
year = "2026",
address = "San Diego, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.mellm-1.28/",
pages = "284--293",
ISBN = "979-8-89176-430-9",
abstract = "A common assumption holds that switching to a non-English language makes a multilingual RAG system easier to attack for personal information. On an English-source synthetic-PII corpus with five query languages and a two-stage defence (LLM input judge + regex output filter), the output-stage point estimates do not support that assumption: English has the highest observed unstructured-PII leak rate, and only English-vs-Swahili separates cleanly under our document-level bootstrap intervals. Once the input judge is added, residual leaks remain on Arabic and Swahili in this Qwen-mediated pipeline, and back-translating the query does not close the gap. Translator, judge, and generator share one model family, so we treat this as pipeline-conditional rather than a causal language ranking. As an oracle diagnostic on a separate n=17 multilingual-prompted-judge residual corner, attaching the gold corpus document to the input judge blocks 15/17 residual cells {---} a follow-up direction, not a deployed mitigation, since all BLOCK/ALLOW rates are on adversarial queries only and we measure no benign-query FPR or utility. The anonymous supplement contains code, corpora, queries, and per-trial JSONLs."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="li-etal-2026-privacy">
<titleInfo>
<title>Where Privacy Risk Lives in English-Source Multilingual RAG: A Stage-Decomposed Audit Across Five Query Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yanhang</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhichao</namePart>
<namePart type="family">Fan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zexin</namePart>
<namePart type="family">Zhuang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Multilinguality in the Era of Large Language Models (MeLLM 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kaiyu</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fengran</namePart>
<namePart type="family">Mo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pinzhen</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Meng</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-430-9</identifier>
</relatedItem>
<abstract>A common assumption holds that switching to a non-English language makes a multilingual RAG system easier to attack for personal information. On an English-source synthetic-PII corpus with five query languages and a two-stage defence (LLM input judge + regex output filter), the output-stage point estimates do not support that assumption: English has the highest observed unstructured-PII leak rate, and only English-vs-Swahili separates cleanly under our document-level bootstrap intervals. Once the input judge is added, residual leaks remain on Arabic and Swahili in this Qwen-mediated pipeline, and back-translating the query does not close the gap. Translator, judge, and generator share one model family, so we treat this as pipeline-conditional rather than a causal language ranking. As an oracle diagnostic on a separate n=17 multilingual-prompted-judge residual corner, attaching the gold corpus document to the input judge blocks 15/17 residual cells — a follow-up direction, not a deployed mitigation, since all BLOCK/ALLOW rates are on adversarial queries only and we measure no benign-query FPR or utility. The anonymous supplement contains code, corpora, queries, and per-trial JSONLs.</abstract>
<identifier type="citekey">li-etal-2026-privacy</identifier>
<location>
<url>https://aclanthology.org/2026.mellm-1.28/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>284</start>
<end>293</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Where Privacy Risk Lives in English-Source Multilingual RAG: A Stage-Decomposed Audit Across Five Query Languages
%A Li, Yanhang
%A Fan, Zhichao
%A Zhuang, Zexin
%Y Huang, Kaiyu
%Y Mo, Fengran
%Y Chen, Pinzhen
%Y Jiang, Meng
%S Proceedings of the 1st Workshop on Multilinguality in the Era of Large Language Models (MeLLM 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, United States
%@ 979-8-89176-430-9
%F li-etal-2026-privacy
%X A common assumption holds that switching to a non-English language makes a multilingual RAG system easier to attack for personal information. On an English-source synthetic-PII corpus with five query languages and a two-stage defence (LLM input judge + regex output filter), the output-stage point estimates do not support that assumption: English has the highest observed unstructured-PII leak rate, and only English-vs-Swahili separates cleanly under our document-level bootstrap intervals. Once the input judge is added, residual leaks remain on Arabic and Swahili in this Qwen-mediated pipeline, and back-translating the query does not close the gap. Translator, judge, and generator share one model family, so we treat this as pipeline-conditional rather than a causal language ranking. As an oracle diagnostic on a separate n=17 multilingual-prompted-judge residual corner, attaching the gold corpus document to the input judge blocks 15/17 residual cells — a follow-up direction, not a deployed mitigation, since all BLOCK/ALLOW rates are on adversarial queries only and we measure no benign-query FPR or utility. The anonymous supplement contains code, corpora, queries, and per-trial JSONLs.
%U https://aclanthology.org/2026.mellm-1.28/
%P 284-293
Markdown (Informal)
[Where Privacy Risk Lives in English-Source Multilingual RAG: A Stage-Decomposed Audit Across Five Query Languages](https://aclanthology.org/2026.mellm-1.28/) (Li et al., MeLLM 2026)
ACL