@inproceedings{madaan-etal-2026-multi,
title = "Multi-Token Completion for Text Anonymization",
author = "Madaan, Pulkit and
Ramesh, Krithika and
Bauer, Lisa and
Peris, Charith and
Field, Anjalie",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.eacl-long.276/",
pages = "5894--5908",
ISBN = "979-8-89176-380-7",
abstract = "Text anonymization is a critical task for enabling research and development in high-stakes domains containing private data, like medicine, law, and social services. While much research has focused on redacting sensitive content from text, substantially less work has focused on what to replace redacted content with, which can enhance privacy and becomes increasingly important with greater levels of redaction. In this work, we formulate predicting replacements for sensitive spans as a research task with principled use-inspired evaluation criteria. We further propose a multi-token completion method for accomplishing this task that is designed to preserve consistency with low compute requirements, thus facilitating practitioners to anonymize data locally before sharing it externally. Human and automated annotations demonstrate that our approach produces more realistic text and better preserves utility than alternative infilling methods and differentially private mechanisms across multiple domains without retraining. Overall, our work explores the under-studied task of what to replace redacted content with and contributes grounded evaluations capturing utility, facilitating future work."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="madaan-etal-2026-multi">
<titleInfo>
<title>Multi-Token Completion for Text Anonymization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pulkit</namePart>
<namePart type="family">Madaan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Krithika</namePart>
<namePart type="family">Ramesh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lisa</namePart>
<namePart type="family">Bauer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Charith</namePart>
<namePart type="family">Peris</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anjalie</namePart>
<namePart type="family">Field</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Marquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-380-7</identifier>
</relatedItem>
<abstract>Text anonymization is a critical task for enabling research and development in high-stakes domains containing private data, like medicine, law, and social services. While much research has focused on redacting sensitive content from text, substantially less work has focused on what to replace redacted content with, which can enhance privacy and becomes increasingly important with greater levels of redaction. In this work, we formulate predicting replacements for sensitive spans as a research task with principled use-inspired evaluation criteria. We further propose a multi-token completion method for accomplishing this task that is designed to preserve consistency with low compute requirements, thus facilitating practitioners to anonymize data locally before sharing it externally. Human and automated annotations demonstrate that our approach produces more realistic text and better preserves utility than alternative infilling methods and differentially private mechanisms across multiple domains without retraining. Overall, our work explores the under-studied task of what to replace redacted content with and contributes grounded evaluations capturing utility, facilitating future work.</abstract>
<identifier type="citekey">madaan-etal-2026-multi</identifier>
<location>
<url>https://aclanthology.org/2026.eacl-long.276/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>5894</start>
<end>5908</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multi-Token Completion for Text Anonymization
%A Madaan, Pulkit
%A Ramesh, Krithika
%A Bauer, Lisa
%A Peris, Charith
%A Field, Anjalie
%Y Demberg, Vera
%Y Inui, Kentaro
%Y Marquez, Lluís
%S Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-380-7
%F madaan-etal-2026-multi
%X Text anonymization is a critical task for enabling research and development in high-stakes domains containing private data, like medicine, law, and social services. While much research has focused on redacting sensitive content from text, substantially less work has focused on what to replace redacted content with, which can enhance privacy and becomes increasingly important with greater levels of redaction. In this work, we formulate predicting replacements for sensitive spans as a research task with principled use-inspired evaluation criteria. We further propose a multi-token completion method for accomplishing this task that is designed to preserve consistency with low compute requirements, thus facilitating practitioners to anonymize data locally before sharing it externally. Human and automated annotations demonstrate that our approach produces more realistic text and better preserves utility than alternative infilling methods and differentially private mechanisms across multiple domains without retraining. Overall, our work explores the under-studied task of what to replace redacted content with and contributes grounded evaluations capturing utility, facilitating future work.
%U https://aclanthology.org/2026.eacl-long.276/
%P 5894-5908
Markdown (Informal)
[Multi-Token Completion for Text Anonymization](https://aclanthology.org/2026.eacl-long.276/) (Madaan et al., EACL 2026)
ACL
- Pulkit Madaan, Krithika Ramesh, Lisa Bauer, Charith Peris, and Anjalie Field. 2026. Multi-Token Completion for Text Anonymization. In Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers), pages 5894–5908, Rabat, Morocco. Association for Computational Linguistics.