@inproceedings{yang-li-2024-best,
title = "The Best Defense is Attack: Repairing Semantics in Textual Adversarial Examples",
author = "Yang, Heng and
Li, Ke",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-main.481",
pages = "8439--8457",
abstract = "Recent studies have revealed the vulnerability of pre-trained language models to adversarial attacks. Adversarial defense techniques have been proposed to reconstruct adversarial examples within feature or text spaces. However, these methods struggle to effectively repair the semantics in adversarial examples, resulting in unsatisfactory defense performance. To repair the semantics in adversarial examples, we introduce a novel approach named Reactive Perturbation Defocusing (Rapid), which employs an adversarial detector to identify the fake labels of adversarial examples and leverages adversarial attackers to repair the semantics in adversarial examples. Our extensive experimental results, conducted on four public datasets, demonstrate the consistent effectiveness of Rapid in various adversarial attack scenarios. For easy evaluation, we provide a click-to-run demo of Rapid at https://tinyurl.com/22ercuf8.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yang-li-2024-best">
<titleInfo>
<title>The Best Defense is Attack: Repairing Semantics in Textual Adversarial Examples</title>
</titleInfo>
<name type="personal">
<namePart type="given">Heng</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ke</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recent studies have revealed the vulnerability of pre-trained language models to adversarial attacks. Adversarial defense techniques have been proposed to reconstruct adversarial examples within feature or text spaces. However, these methods struggle to effectively repair the semantics in adversarial examples, resulting in unsatisfactory defense performance. To repair the semantics in adversarial examples, we introduce a novel approach named Reactive Perturbation Defocusing (Rapid), which employs an adversarial detector to identify the fake labels of adversarial examples and leverages adversarial attackers to repair the semantics in adversarial examples. Our extensive experimental results, conducted on four public datasets, demonstrate the consistent effectiveness of Rapid in various adversarial attack scenarios. For easy evaluation, we provide a click-to-run demo of Rapid at https://tinyurl.com/22ercuf8.</abstract>
<identifier type="citekey">yang-li-2024-best</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-main.481</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>8439</start>
<end>8457</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Best Defense is Attack: Repairing Semantics in Textual Adversarial Examples
%A Yang, Heng
%A Li, Ke
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F yang-li-2024-best
%X Recent studies have revealed the vulnerability of pre-trained language models to adversarial attacks. Adversarial defense techniques have been proposed to reconstruct adversarial examples within feature or text spaces. However, these methods struggle to effectively repair the semantics in adversarial examples, resulting in unsatisfactory defense performance. To repair the semantics in adversarial examples, we introduce a novel approach named Reactive Perturbation Defocusing (Rapid), which employs an adversarial detector to identify the fake labels of adversarial examples and leverages adversarial attackers to repair the semantics in adversarial examples. Our extensive experimental results, conducted on four public datasets, demonstrate the consistent effectiveness of Rapid in various adversarial attack scenarios. For easy evaluation, we provide a click-to-run demo of Rapid at https://tinyurl.com/22ercuf8.
%U https://aclanthology.org/2024.emnlp-main.481
%P 8439-8457
Markdown (Informal)
[The Best Defense is Attack: Repairing Semantics in Textual Adversarial Examples](https://aclanthology.org/2024.emnlp-main.481) (Yang & Li, EMNLP 2024)
ACL