@inproceedings{shi-etal-2026-stamp,
title = "{STAMP}-{R}: Stylometric Text Anonymization with Memory-guided Policy Rewriting",
author = "Shi, Zhan and
Yuan, Yefeng and
Cheng, Liang and
Liu, Yuhong",
editor = "Habernal, Ivan and
Ghanavati, Sepideh and
Haghighi, Sara and
Ramesh, Krithika and
Igamberdiev, Timour and
Wilson, Shomir",
booktitle = "Proceedings of the Seventh Workshop on Privacy in Natural Language Processing",
month = jul,
year = "2026",
address = "San Diego, California",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.privatenlp-main.4/",
doi = "10.18653/v1/2026.privatenlp-main.4",
pages = "53--68",
ISBN = "979-8-89176-397-5",
abstract = "Modern machine learning systems rely heavily on large-scale textual data that often contain sensitive personal information. Although conventional anonymization techniques remove explicit identifiers, textual data remain vulnerable to authorship inference attacks that exploit persistent stylometric signals.Recent approaches leverage Large Language Models (LLMs) to rewrite text and obscure such signals, but they frequently overlook distinctive stylometric outliers and fail to achieve a favorable privacy{--}utility trade-off due to rigid, one-size-fits-all obfuscation strategies, while also incurring high computational costs.To address these challenges, we propose STAMP-R, a risk-adaptive reinforcement learning framework for instance-level authorship anonymization. We formulate anonymization as a risk-aware, instance-level style distribution shaping problem. Central to our approach is the Style Manifold Memory (SMM), which models the global stylistic landscape via prototype-based density estimation. SMM detects high-risk stylometric outliers and adaptively modulates a composite reward function, enabling stronger obfuscation for highly identifiable samples while preserving semantic fidelity for low-risk instances.We further distill a lightweight 3B-parameter model from a teacher LLM for efficient local deployment. Experiments show that STAMP-R reduces authorship re-identification risk while maintaining strong downstream utility."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="shi-etal-2026-stamp">
<titleInfo>
<title>STAMP-R: Stylometric Text Anonymization with Memory-guided Policy Rewriting</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zhan</namePart>
<namePart type="family">Shi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yefeng</namePart>
<namePart type="family">Yuan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liang</namePart>
<namePart type="family">Cheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuhong</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh Workshop on Privacy in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Habernal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sepideh</namePart>
<namePart type="family">Ghanavati</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Haghighi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Krithika</namePart>
<namePart type="family">Ramesh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Timour</namePart>
<namePart type="family">Igamberdiev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shomir</namePart>
<namePart type="family">Wilson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-397-5</identifier>
</relatedItem>
<abstract>Modern machine learning systems rely heavily on large-scale textual data that often contain sensitive personal information. Although conventional anonymization techniques remove explicit identifiers, textual data remain vulnerable to authorship inference attacks that exploit persistent stylometric signals.Recent approaches leverage Large Language Models (LLMs) to rewrite text and obscure such signals, but they frequently overlook distinctive stylometric outliers and fail to achieve a favorable privacy–utility trade-off due to rigid, one-size-fits-all obfuscation strategies, while also incurring high computational costs.To address these challenges, we propose STAMP-R, a risk-adaptive reinforcement learning framework for instance-level authorship anonymization. We formulate anonymization as a risk-aware, instance-level style distribution shaping problem. Central to our approach is the Style Manifold Memory (SMM), which models the global stylistic landscape via prototype-based density estimation. SMM detects high-risk stylometric outliers and adaptively modulates a composite reward function, enabling stronger obfuscation for highly identifiable samples while preserving semantic fidelity for low-risk instances.We further distill a lightweight 3B-parameter model from a teacher LLM for efficient local deployment. Experiments show that STAMP-R reduces authorship re-identification risk while maintaining strong downstream utility.</abstract>
<identifier type="citekey">shi-etal-2026-stamp</identifier>
<identifier type="doi">10.18653/v1/2026.privatenlp-main.4</identifier>
<location>
<url>https://aclanthology.org/2026.privatenlp-main.4/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>53</start>
<end>68</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T STAMP-R: Stylometric Text Anonymization with Memory-guided Policy Rewriting
%A Shi, Zhan
%A Yuan, Yefeng
%A Cheng, Liang
%A Liu, Yuhong
%Y Habernal, Ivan
%Y Ghanavati, Sepideh
%Y Haghighi, Sara
%Y Ramesh, Krithika
%Y Igamberdiev, Timour
%Y Wilson, Shomir
%S Proceedings of the Seventh Workshop on Privacy in Natural Language Processing
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California
%@ 979-8-89176-397-5
%F shi-etal-2026-stamp
%X Modern machine learning systems rely heavily on large-scale textual data that often contain sensitive personal information. Although conventional anonymization techniques remove explicit identifiers, textual data remain vulnerable to authorship inference attacks that exploit persistent stylometric signals.Recent approaches leverage Large Language Models (LLMs) to rewrite text and obscure such signals, but they frequently overlook distinctive stylometric outliers and fail to achieve a favorable privacy–utility trade-off due to rigid, one-size-fits-all obfuscation strategies, while also incurring high computational costs.To address these challenges, we propose STAMP-R, a risk-adaptive reinforcement learning framework for instance-level authorship anonymization. We formulate anonymization as a risk-aware, instance-level style distribution shaping problem. Central to our approach is the Style Manifold Memory (SMM), which models the global stylistic landscape via prototype-based density estimation. SMM detects high-risk stylometric outliers and adaptively modulates a composite reward function, enabling stronger obfuscation for highly identifiable samples while preserving semantic fidelity for low-risk instances.We further distill a lightweight 3B-parameter model from a teacher LLM for efficient local deployment. Experiments show that STAMP-R reduces authorship re-identification risk while maintaining strong downstream utility.
%R 10.18653/v1/2026.privatenlp-main.4
%U https://aclanthology.org/2026.privatenlp-main.4/
%U https://doi.org/10.18653/v1/2026.privatenlp-main.4
%P 53-68
Markdown (Informal)
[STAMP-R: Stylometric Text Anonymization with Memory-guided Policy Rewriting](https://aclanthology.org/2026.privatenlp-main.4/) (Shi et al., PrivateNLP 2026)
ACL