@inproceedings{ma-etal-2026-protecting,
title = "Protecting Language Models Against Unauthorized Distillation through Trace Rewriting",
author = "Ma, Xinhang and
Yeoh, William and
Zhang, Ning and
Vorobeychik, Yevgeniy",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-long.519/",
pages = "11307--11324",
ISBN = "979-8-89176-390-6",
abstract = "Knowledge distillation is a widely adopted technique for transferring capabilities from LLMs to smaller, more efficient student models.However, unauthorized use of knowledge distillation takes unfair advantage of the considerable effort and cost put into developing frontier models.We investigate methods for modifying teacher-generated reasoning traces to achieve two objectives that deter unauthorized distillation: (1) \textit{anti-distillation}, or degrading the training usefulness of query responses, and (2) \textit{API watermarking}, which embeds verifiable signatures in student models.We introduce several approaches for dynamically rewriting a teacher{'}s reasoning outputs while preserving answer correctness and semantic coherence.Two of these leverage the rewriting capabilities of LLMs, while others use gradient-based techniques.Our experiments show that a simple instruction-based rewriting approach achieves a strong anti-distillation effect while maintaining or even improving teacher performance.Furthermore, we show that our rewriting approach also enables embedding watermarks that can be reliably detectedwith essentially no false alarms.Our code is available at \url{https://github.com/xhOwenMa/trace-rewriting}."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ma-etal-2026-protecting">
<titleInfo>
<title>Protecting Language Models Against Unauthorized Distillation through Trace Rewriting</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xinhang</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">William</namePart>
<namePart type="family">Yeoh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ning</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yevgeniy</namePart>
<namePart type="family">Vorobeychik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-390-6</identifier>
</relatedItem>
<abstract>Knowledge distillation is a widely adopted technique for transferring capabilities from LLMs to smaller, more efficient student models.However, unauthorized use of knowledge distillation takes unfair advantage of the considerable effort and cost put into developing frontier models.We investigate methods for modifying teacher-generated reasoning traces to achieve two objectives that deter unauthorized distillation: (1) anti-distillation, or degrading the training usefulness of query responses, and (2) API watermarking, which embeds verifiable signatures in student models.We introduce several approaches for dynamically rewriting a teacher’s reasoning outputs while preserving answer correctness and semantic coherence.Two of these leverage the rewriting capabilities of LLMs, while others use gradient-based techniques.Our experiments show that a simple instruction-based rewriting approach achieves a strong anti-distillation effect while maintaining or even improving teacher performance.Furthermore, we show that our rewriting approach also enables embedding watermarks that can be reliably detectedwith essentially no false alarms.Our code is available at https://github.com/xhOwenMa/trace-rewriting.</abstract>
<identifier type="citekey">ma-etal-2026-protecting</identifier>
<location>
<url>https://aclanthology.org/2026.acl-long.519/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>11307</start>
<end>11324</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Protecting Language Models Against Unauthorized Distillation through Trace Rewriting
%A Ma, Xinhang
%A Yeoh, William
%A Zhang, Ning
%A Vorobeychik, Yevgeniy
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-390-6
%F ma-etal-2026-protecting
%X Knowledge distillation is a widely adopted technique for transferring capabilities from LLMs to smaller, more efficient student models.However, unauthorized use of knowledge distillation takes unfair advantage of the considerable effort and cost put into developing frontier models.We investigate methods for modifying teacher-generated reasoning traces to achieve two objectives that deter unauthorized distillation: (1) anti-distillation, or degrading the training usefulness of query responses, and (2) API watermarking, which embeds verifiable signatures in student models.We introduce several approaches for dynamically rewriting a teacher’s reasoning outputs while preserving answer correctness and semantic coherence.Two of these leverage the rewriting capabilities of LLMs, while others use gradient-based techniques.Our experiments show that a simple instruction-based rewriting approach achieves a strong anti-distillation effect while maintaining or even improving teacher performance.Furthermore, we show that our rewriting approach also enables embedding watermarks that can be reliably detectedwith essentially no false alarms.Our code is available at https://github.com/xhOwenMa/trace-rewriting.
%U https://aclanthology.org/2026.acl-long.519/
%P 11307-11324
Markdown (Informal)
[Protecting Language Models Against Unauthorized Distillation through Trace Rewriting](https://aclanthology.org/2026.acl-long.519/) (Ma et al., ACL 2026)
ACL