@inproceedings{ajith-etal-2024-downstream,
title = "Downstream Trade-offs of a Family of Text Watermarks",
author = "Ajith, Anirudh and
Singh, Sameer and
Pruthi, Danish",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.821",
pages = "14039--14053",
abstract = "Watermarking involves implanting an imperceptible signal into generated text that can later be detected via statistical tests. A prominent family of watermarking strategies for LLMs embeds this signal by upsampling a (pseudorandomly-chosen) subset of tokens at every generation step. However, such signals alter the model{'}s output distribution and can have unintended effects on its downstream performance. In this work, we evaluate the performance of LLMs watermarked using three different strategies over a diverse suite of tasks including those cast as k-class classification (CLS), multiple choice question answering (MCQ), short-form generation (e.g., open-ended question answering) and long-form generation (e.g., translation) tasks. We find that watermarks (under realistic hyperparameters) can cause significant drops in LLMs{'} effective utility across all tasks. We observe drops of 10 to 20{\%} in CLS tasks in the average case, which shoot up to 100{\%} in the worst case. We notice degradations of about 7{\%} in MCQ tasks, 10-15{\%} in short-form generation, and 5-15{\%} in long-form generation tasks. Our findings highlight the trade-offs that users should be cognizant of when using watermarked models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ajith-etal-2024-downstream">
<titleInfo>
<title>Downstream Trade-offs of a Family of Text Watermarks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anirudh</namePart>
<namePart type="family">Ajith</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sameer</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Danish</namePart>
<namePart type="family">Pruthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Watermarking involves implanting an imperceptible signal into generated text that can later be detected via statistical tests. A prominent family of watermarking strategies for LLMs embeds this signal by upsampling a (pseudorandomly-chosen) subset of tokens at every generation step. However, such signals alter the model’s output distribution and can have unintended effects on its downstream performance. In this work, we evaluate the performance of LLMs watermarked using three different strategies over a diverse suite of tasks including those cast as k-class classification (CLS), multiple choice question answering (MCQ), short-form generation (e.g., open-ended question answering) and long-form generation (e.g., translation) tasks. We find that watermarks (under realistic hyperparameters) can cause significant drops in LLMs’ effective utility across all tasks. We observe drops of 10 to 20% in CLS tasks in the average case, which shoot up to 100% in the worst case. We notice degradations of about 7% in MCQ tasks, 10-15% in short-form generation, and 5-15% in long-form generation tasks. Our findings highlight the trade-offs that users should be cognizant of when using watermarked models.</abstract>
<identifier type="citekey">ajith-etal-2024-downstream</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.821</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>14039</start>
<end>14053</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Downstream Trade-offs of a Family of Text Watermarks
%A Ajith, Anirudh
%A Singh, Sameer
%A Pruthi, Danish
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F ajith-etal-2024-downstream
%X Watermarking involves implanting an imperceptible signal into generated text that can later be detected via statistical tests. A prominent family of watermarking strategies for LLMs embeds this signal by upsampling a (pseudorandomly-chosen) subset of tokens at every generation step. However, such signals alter the model’s output distribution and can have unintended effects on its downstream performance. In this work, we evaluate the performance of LLMs watermarked using three different strategies over a diverse suite of tasks including those cast as k-class classification (CLS), multiple choice question answering (MCQ), short-form generation (e.g., open-ended question answering) and long-form generation (e.g., translation) tasks. We find that watermarks (under realistic hyperparameters) can cause significant drops in LLMs’ effective utility across all tasks. We observe drops of 10 to 20% in CLS tasks in the average case, which shoot up to 100% in the worst case. We notice degradations of about 7% in MCQ tasks, 10-15% in short-form generation, and 5-15% in long-form generation tasks. Our findings highlight the trade-offs that users should be cognizant of when using watermarked models.
%U https://aclanthology.org/2024.findings-emnlp.821
%P 14039-14053
Markdown (Informal)
[Downstream Trade-offs of a Family of Text Watermarks](https://aclanthology.org/2024.findings-emnlp.821) (Ajith et al., Findings 2024)
ACL
- Anirudh Ajith, Sameer Singh, and Danish Pruthi. 2024. Downstream Trade-offs of a Family of Text Watermarks. In Findings of the Association for Computational Linguistics: EMNLP 2024, pages 14039–14053, Miami, Florida, USA. Association for Computational Linguistics.