@inproceedings{wang-cheng-2024-guardemb,
title = "{G}uard{E}mb: Dynamic Watermark for Safeguarding Large Language Model Embedding Service Against Model Stealing Attack",
author = "Wang, Liaoyaqi and
Cheng, Minhao",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.441",
pages = "7518--7534",
abstract = "Large language model (LLM) companies provide Embedding as a Service (EaaS) to assist the individual in efficiently dealing with downstream tasks such as text classification and recommendation. However, recent works reveal the risk of the model stealing attack, posing a financial threat to EaaS providers. To protect the copyright of EaaS, we propose GuardEmb, a dynamic embedding watermarking method, striking a balance between enhancing watermark detectability and preserving embedding functionality. Our approach involves selecting special tokens and perturbing embeddings containing these tokens to inject watermarks. Simultaneously, we train a verifier to detect these watermarks. In the event of an attacker attempting to replicate our EaaS for profit, their model inherits our watermarks. For watermark verification, we construct verification texts to query the suspicious EaaS, and the verifier identifies our watermarks within the responses, effectively tracing copyright infringement. Extensive experiments across diverse datasets showcase the high detectability of our watermark method, even in out-of-distribution scenarios, without compromising embedding functionality. Our code is publicly available at https://github.com/Melodramass/Dynamic-Watermark.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-cheng-2024-guardemb">
<titleInfo>
<title>GuardEmb: Dynamic Watermark for Safeguarding Large Language Model Embedding Service Against Model Stealing Attack</title>
</titleInfo>
<name type="personal">
<namePart type="given">Liaoyaqi</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Minhao</namePart>
<namePart type="family">Cheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large language model (LLM) companies provide Embedding as a Service (EaaS) to assist the individual in efficiently dealing with downstream tasks such as text classification and recommendation. However, recent works reveal the risk of the model stealing attack, posing a financial threat to EaaS providers. To protect the copyright of EaaS, we propose GuardEmb, a dynamic embedding watermarking method, striking a balance between enhancing watermark detectability and preserving embedding functionality. Our approach involves selecting special tokens and perturbing embeddings containing these tokens to inject watermarks. Simultaneously, we train a verifier to detect these watermarks. In the event of an attacker attempting to replicate our EaaS for profit, their model inherits our watermarks. For watermark verification, we construct verification texts to query the suspicious EaaS, and the verifier identifies our watermarks within the responses, effectively tracing copyright infringement. Extensive experiments across diverse datasets showcase the high detectability of our watermark method, even in out-of-distribution scenarios, without compromising embedding functionality. Our code is publicly available at https://github.com/Melodramass/Dynamic-Watermark.</abstract>
<identifier type="citekey">wang-cheng-2024-guardemb</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.441</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>7518</start>
<end>7534</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T GuardEmb: Dynamic Watermark for Safeguarding Large Language Model Embedding Service Against Model Stealing Attack
%A Wang, Liaoyaqi
%A Cheng, Minhao
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F wang-cheng-2024-guardemb
%X Large language model (LLM) companies provide Embedding as a Service (EaaS) to assist the individual in efficiently dealing with downstream tasks such as text classification and recommendation. However, recent works reveal the risk of the model stealing attack, posing a financial threat to EaaS providers. To protect the copyright of EaaS, we propose GuardEmb, a dynamic embedding watermarking method, striking a balance between enhancing watermark detectability and preserving embedding functionality. Our approach involves selecting special tokens and perturbing embeddings containing these tokens to inject watermarks. Simultaneously, we train a verifier to detect these watermarks. In the event of an attacker attempting to replicate our EaaS for profit, their model inherits our watermarks. For watermark verification, we construct verification texts to query the suspicious EaaS, and the verifier identifies our watermarks within the responses, effectively tracing copyright infringement. Extensive experiments across diverse datasets showcase the high detectability of our watermark method, even in out-of-distribution scenarios, without compromising embedding functionality. Our code is publicly available at https://github.com/Melodramass/Dynamic-Watermark.
%U https://aclanthology.org/2024.findings-emnlp.441
%P 7518-7534
Markdown (Informal)
[GuardEmb: Dynamic Watermark for Safeguarding Large Language Model Embedding Service Against Model Stealing Attack](https://aclanthology.org/2024.findings-emnlp.441) (Wang & Cheng, Findings 2024)
ACL