@inproceedings{zhang-etal-2023-slowbert,
title = "{S}low{BERT}: Slow-down Attacks on Input-adaptive Multi-exit {BERT}",
author = "Zhang, Shengyao and
Pan, Xudong and
Zhang, Mi and
Yang, Min",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-acl.634/",
doi = "10.18653/v1/2023.findings-acl.634",
pages = "9992--10007",
abstract = "For pretrained language models such as Google`s BERT, recent research designs several input-adaptive inference mechanisms to improve the efficiency on cloud and edge devices. In this paper, we reveal a new attack surface on input-adaptive multi-exit BERT, where the adversary imperceptibly modifies the input texts to drastically increase the average inference cost. Our proposed slow-down attack called \textit{SlowBERT} integrates a new rank-and-substitute adversarial text generation algorithm to efficiently search for the perturbation which maximally delays the exiting time. With no direct access to the model internals, we further devise a \textit{time-based approximation algorithm} to infer the exit position as the loss oracle. Our extensive evaluation on two popular instances of multi-exit BERT for GLUE classification tasks validates the effectiveness of SlowBERT. In the worst case, SlowBERT increases the inference cost by $4.57\times$, which would strongly hurt the service quality of multi-exit BERT in practice, e.g., increasing the real-time cloud services' response times for online users."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-etal-2023-slowbert">
<titleInfo>
<title>SlowBERT: Slow-down Attacks on Input-adaptive Multi-exit BERT</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shengyao</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xudong</namePart>
<namePart type="family">Pan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mi</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Boyd-Graber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naoaki</namePart>
<namePart type="family">Okazaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>For pretrained language models such as Google‘s BERT, recent research designs several input-adaptive inference mechanisms to improve the efficiency on cloud and edge devices. In this paper, we reveal a new attack surface on input-adaptive multi-exit BERT, where the adversary imperceptibly modifies the input texts to drastically increase the average inference cost. Our proposed slow-down attack called SlowBERT integrates a new rank-and-substitute adversarial text generation algorithm to efficiently search for the perturbation which maximally delays the exiting time. With no direct access to the model internals, we further devise a time-based approximation algorithm to infer the exit position as the loss oracle. Our extensive evaluation on two popular instances of multi-exit BERT for GLUE classification tasks validates the effectiveness of SlowBERT. In the worst case, SlowBERT increases the inference cost by 4.57\times, which would strongly hurt the service quality of multi-exit BERT in practice, e.g., increasing the real-time cloud services’ response times for online users.</abstract>
<identifier type="citekey">zhang-etal-2023-slowbert</identifier>
<identifier type="doi">10.18653/v1/2023.findings-acl.634</identifier>
<location>
<url>https://aclanthology.org/2023.findings-acl.634/</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>9992</start>
<end>10007</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SlowBERT: Slow-down Attacks on Input-adaptive Multi-exit BERT
%A Zhang, Shengyao
%A Pan, Xudong
%A Zhang, Mi
%A Yang, Min
%Y Rogers, Anna
%Y Boyd-Graber, Jordan
%Y Okazaki, Naoaki
%S Findings of the Association for Computational Linguistics: ACL 2023
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F zhang-etal-2023-slowbert
%X For pretrained language models such as Google‘s BERT, recent research designs several input-adaptive inference mechanisms to improve the efficiency on cloud and edge devices. In this paper, we reveal a new attack surface on input-adaptive multi-exit BERT, where the adversary imperceptibly modifies the input texts to drastically increase the average inference cost. Our proposed slow-down attack called SlowBERT integrates a new rank-and-substitute adversarial text generation algorithm to efficiently search for the perturbation which maximally delays the exiting time. With no direct access to the model internals, we further devise a time-based approximation algorithm to infer the exit position as the loss oracle. Our extensive evaluation on two popular instances of multi-exit BERT for GLUE classification tasks validates the effectiveness of SlowBERT. In the worst case, SlowBERT increases the inference cost by 4.57\times, which would strongly hurt the service quality of multi-exit BERT in practice, e.g., increasing the real-time cloud services’ response times for online users.
%R 10.18653/v1/2023.findings-acl.634
%U https://aclanthology.org/2023.findings-acl.634/
%U https://doi.org/10.18653/v1/2023.findings-acl.634
%P 9992-10007
Markdown (Informal)
[SlowBERT: Slow-down Attacks on Input-adaptive Multi-exit BERT](https://aclanthology.org/2023.findings-acl.634/) (Zhang et al., Findings 2023)
ACL