@inproceedings{flemings-etal-2025-estimating,
title = "Estimating Privacy Leakage of Augmented Contextual Knowledge in Language Models",
author = "Flemings, James and
Jiang, Bo and
Zhang, Wanrong and
Takhirov, Zafar and
Annavaram, Murali",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-long.1220/",
doi = "10.18653/v1/2025.acl-long.1220",
pages = "25092--25108",
ISBN = "979-8-89176-251-0",
abstract = "Language models (LMs) rely on their parametric knowledge augmented with relevant contextual knowledge for certain tasks, such as question answering. However, the contextual knowledge can contain private information that may be leaked when answering queries, and estimating this privacy leakage is not well understood. A straightforward approach of directly comparing an LM{'}s output to the contexts can overestimate the privacy risk, since the LM{'}s parametric knowledge might already contain the augmented contextual knowledge. To this end, we introduce \textit{context influence}, a metric that builds on differential privacy, a widely-adopted privacy notion, to estimate the privacy leakage of contextual knowledge during decoding. Our approach effectively measures how each subset of the context influences an LM{'}s response while separating the specific parametric knowledge of the LM. Using our context influence metric, we demonstrate that context privacy leakage occurs when contextual knowledge is out of distribution with respect to parametric knowledge. Moreover, we experimentally demonstrate how context influence properly attributes the privacy leakage to augmented contexts, and we evaluate how factors{--} such as model size, context size, generation position, etc.{--} affect context privacy leakage. The practical implications of our results will inform practitioners of the privacy risk associated with augmented contextual knowledge."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="flemings-etal-2025-estimating">
<titleInfo>
<title>Estimating Privacy Leakage of Augmented Contextual Knowledge in Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Flemings</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bo</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wanrong</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zafar</namePart>
<namePart type="family">Takhirov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Murali</namePart>
<namePart type="family">Annavaram</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-251-0</identifier>
</relatedItem>
<abstract>Language models (LMs) rely on their parametric knowledge augmented with relevant contextual knowledge for certain tasks, such as question answering. However, the contextual knowledge can contain private information that may be leaked when answering queries, and estimating this privacy leakage is not well understood. A straightforward approach of directly comparing an LM’s output to the contexts can overestimate the privacy risk, since the LM’s parametric knowledge might already contain the augmented contextual knowledge. To this end, we introduce context influence, a metric that builds on differential privacy, a widely-adopted privacy notion, to estimate the privacy leakage of contextual knowledge during decoding. Our approach effectively measures how each subset of the context influences an LM’s response while separating the specific parametric knowledge of the LM. Using our context influence metric, we demonstrate that context privacy leakage occurs when contextual knowledge is out of distribution with respect to parametric knowledge. Moreover, we experimentally demonstrate how context influence properly attributes the privacy leakage to augmented contexts, and we evaluate how factors– such as model size, context size, generation position, etc.– affect context privacy leakage. The practical implications of our results will inform practitioners of the privacy risk associated with augmented contextual knowledge.</abstract>
<identifier type="citekey">flemings-etal-2025-estimating</identifier>
<identifier type="doi">10.18653/v1/2025.acl-long.1220</identifier>
<location>
<url>https://aclanthology.org/2025.acl-long.1220/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>25092</start>
<end>25108</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Estimating Privacy Leakage of Augmented Contextual Knowledge in Language Models
%A Flemings, James
%A Jiang, Bo
%A Zhang, Wanrong
%A Takhirov, Zafar
%A Annavaram, Murali
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-251-0
%F flemings-etal-2025-estimating
%X Language models (LMs) rely on their parametric knowledge augmented with relevant contextual knowledge for certain tasks, such as question answering. However, the contextual knowledge can contain private information that may be leaked when answering queries, and estimating this privacy leakage is not well understood. A straightforward approach of directly comparing an LM’s output to the contexts can overestimate the privacy risk, since the LM’s parametric knowledge might already contain the augmented contextual knowledge. To this end, we introduce context influence, a metric that builds on differential privacy, a widely-adopted privacy notion, to estimate the privacy leakage of contextual knowledge during decoding. Our approach effectively measures how each subset of the context influences an LM’s response while separating the specific parametric knowledge of the LM. Using our context influence metric, we demonstrate that context privacy leakage occurs when contextual knowledge is out of distribution with respect to parametric knowledge. Moreover, we experimentally demonstrate how context influence properly attributes the privacy leakage to augmented contexts, and we evaluate how factors– such as model size, context size, generation position, etc.– affect context privacy leakage. The practical implications of our results will inform practitioners of the privacy risk associated with augmented contextual knowledge.
%R 10.18653/v1/2025.acl-long.1220
%U https://aclanthology.org/2025.acl-long.1220/
%U https://doi.org/10.18653/v1/2025.acl-long.1220
%P 25092-25108
Markdown (Informal)
[Estimating Privacy Leakage of Augmented Contextual Knowledge in Language Models](https://aclanthology.org/2025.acl-long.1220/) (Flemings et al., ACL 2025)
ACL