@inproceedings{liong-etal-2024-unveiling,
title = "Unveiling Vulnerability of Self-Attention",
author = "Liong, Khai Jiet and
Wu, Hongqiu and
Zhao, Hai",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.1496",
pages = "17225--17236",
abstract = "Pre-trained language models (PLMs) are shown to be vulnerable to minor word changes, which poses a significant threat to real-world systems. While previous studies directly focus on manipulating word inputs, they are limited by their means of generating adversarial samples, lacking generalization to versatile real-world attacks. This paper studies the basic structure of transformer-based PLMs, the self-attention (SA) mechanism. (1) We propose a powerful perturbation technique named {`}HackAttend,{'} which perturbs the attention scores within the SA matrices via meticulously crafted attention masks. We show that state-of-the-art PLMs fall into heavy vulnerability, with minor attention perturbations (1{\%}) resulting in a very high attack success rate (98{\%}). Our paper extends the conventional text attack of word perturbations to more general structural perturbations. (2) We introduce {`}S-Attend,{'} a novel smoothing technique that effectively makes SA robust via structural perturbations. We empirically demonstrate that this simple yet effective technique achieves robust performance on par with adversarial training when facing various text attackers.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="liong-etal-2024-unveiling">
<titleInfo>
<title>Unveiling Vulnerability of Self-Attention</title>
</titleInfo>
<name type="personal">
<namePart type="given">Khai</namePart>
<namePart type="given">Jiet</namePart>
<namePart type="family">Liong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hongqiu</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hai</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Pre-trained language models (PLMs) are shown to be vulnerable to minor word changes, which poses a significant threat to real-world systems. While previous studies directly focus on manipulating word inputs, they are limited by their means of generating adversarial samples, lacking generalization to versatile real-world attacks. This paper studies the basic structure of transformer-based PLMs, the self-attention (SA) mechanism. (1) We propose a powerful perturbation technique named ‘HackAttend,’ which perturbs the attention scores within the SA matrices via meticulously crafted attention masks. We show that state-of-the-art PLMs fall into heavy vulnerability, with minor attention perturbations (1%) resulting in a very high attack success rate (98%). Our paper extends the conventional text attack of word perturbations to more general structural perturbations. (2) We introduce ‘S-Attend,’ a novel smoothing technique that effectively makes SA robust via structural perturbations. We empirically demonstrate that this simple yet effective technique achieves robust performance on par with adversarial training when facing various text attackers.</abstract>
<identifier type="citekey">liong-etal-2024-unveiling</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.1496</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>17225</start>
<end>17236</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Unveiling Vulnerability of Self-Attention
%A Liong, Khai Jiet
%A Wu, Hongqiu
%A Zhao, Hai
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F liong-etal-2024-unveiling
%X Pre-trained language models (PLMs) are shown to be vulnerable to minor word changes, which poses a significant threat to real-world systems. While previous studies directly focus on manipulating word inputs, they are limited by their means of generating adversarial samples, lacking generalization to versatile real-world attacks. This paper studies the basic structure of transformer-based PLMs, the self-attention (SA) mechanism. (1) We propose a powerful perturbation technique named ‘HackAttend,’ which perturbs the attention scores within the SA matrices via meticulously crafted attention masks. We show that state-of-the-art PLMs fall into heavy vulnerability, with minor attention perturbations (1%) resulting in a very high attack success rate (98%). Our paper extends the conventional text attack of word perturbations to more general structural perturbations. (2) We introduce ‘S-Attend,’ a novel smoothing technique that effectively makes SA robust via structural perturbations. We empirically demonstrate that this simple yet effective technique achieves robust performance on par with adversarial training when facing various text attackers.
%U https://aclanthology.org/2024.lrec-main.1496
%P 17225-17236
Markdown (Informal)
[Unveiling Vulnerability of Self-Attention](https://aclanthology.org/2024.lrec-main.1496) (Liong et al., LREC-COLING 2024)
ACL
- Khai Jiet Liong, Hongqiu Wu, and Hai Zhao. 2024. Unveiling Vulnerability of Self-Attention. In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pages 17225–17236, Torino, Italia. ELRA and ICCL.