@inproceedings{rusert-srinivasan-2022-dont,
title = "Don{'}t sweat the small stuff, classify the rest: Sample Shielding to protect text classifiers against adversarial attacks",
author = "Rusert, Jonathan and
Srinivasan, Padmini",
editor = "Carpuat, Marine and
de Marneffe, Marie-Catherine and
Meza Ruiz, Ivan Vladimir",
booktitle = "Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies",
month = jul,
year = "2022",
address = "Seattle, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.naacl-main.195",
doi = "10.18653/v1/2022.naacl-main.195",
pages = "2716--2725",
abstract = "Deep learning (DL) is being used extensively for text classification. However, researchers have demonstrated the vulnerability of such classifiers to adversarial attacks. Attackers modify the text in a way which misleads the classifier while keeping the original meaning close to intact. State-of-the-art (SOTA) attack algorithms follow the general principle of making minimal changes to the text so as to not jeopardize semantics. Taking advantage of this we propose a novel and intuitive defense strategy called Sample Shielding.It is attacker and classifier agnostic, does not require any reconfiguration of the classifier or external resources and is simple to implement. Essentially, we sample subsets of the input text, classify them and summarize these into a final decision. We shield three popular DL text classifiers with Sample Shielding, test their resilience against four SOTA attackers across three datasets in a realistic threat setting. Even when given the advantage of knowing about our shielding strategy the adversary{'}s attack success rate is {\textless}=10{\%} with only one exception and often {\textless} 5{\%}. Additionally, Sample Shielding maintains near original accuracy when applied to original texts. Crucially, we show that the {`}make minimal changes{'} approach of SOTA attackers leads to critical vulnerabilities that can be defended against with an intuitive sampling strategy.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rusert-srinivasan-2022-dont">
<titleInfo>
<title>Don’t sweat the small stuff, classify the rest: Sample Shielding to protect text classifiers against adversarial attacks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="family">Rusert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Padmini</namePart>
<namePart type="family">Srinivasan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marine</namePart>
<namePart type="family">Carpuat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie-Catherine</namePart>
<namePart type="family">de Marneffe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="given">Vladimir</namePart>
<namePart type="family">Meza Ruiz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Deep learning (DL) is being used extensively for text classification. However, researchers have demonstrated the vulnerability of such classifiers to adversarial attacks. Attackers modify the text in a way which misleads the classifier while keeping the original meaning close to intact. State-of-the-art (SOTA) attack algorithms follow the general principle of making minimal changes to the text so as to not jeopardize semantics. Taking advantage of this we propose a novel and intuitive defense strategy called Sample Shielding.It is attacker and classifier agnostic, does not require any reconfiguration of the classifier or external resources and is simple to implement. Essentially, we sample subsets of the input text, classify them and summarize these into a final decision. We shield three popular DL text classifiers with Sample Shielding, test their resilience against four SOTA attackers across three datasets in a realistic threat setting. Even when given the advantage of knowing about our shielding strategy the adversary’s attack success rate is \textless=10% with only one exception and often \textless 5%. Additionally, Sample Shielding maintains near original accuracy when applied to original texts. Crucially, we show that the ‘make minimal changes’ approach of SOTA attackers leads to critical vulnerabilities that can be defended against with an intuitive sampling strategy.</abstract>
<identifier type="citekey">rusert-srinivasan-2022-dont</identifier>
<identifier type="doi">10.18653/v1/2022.naacl-main.195</identifier>
<location>
<url>https://aclanthology.org/2022.naacl-main.195</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>2716</start>
<end>2725</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Don’t sweat the small stuff, classify the rest: Sample Shielding to protect text classifiers against adversarial attacks
%A Rusert, Jonathan
%A Srinivasan, Padmini
%Y Carpuat, Marine
%Y de Marneffe, Marie-Catherine
%Y Meza Ruiz, Ivan Vladimir
%S Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies
%D 2022
%8 July
%I Association for Computational Linguistics
%C Seattle, United States
%F rusert-srinivasan-2022-dont
%X Deep learning (DL) is being used extensively for text classification. However, researchers have demonstrated the vulnerability of such classifiers to adversarial attacks. Attackers modify the text in a way which misleads the classifier while keeping the original meaning close to intact. State-of-the-art (SOTA) attack algorithms follow the general principle of making minimal changes to the text so as to not jeopardize semantics. Taking advantage of this we propose a novel and intuitive defense strategy called Sample Shielding.It is attacker and classifier agnostic, does not require any reconfiguration of the classifier or external resources and is simple to implement. Essentially, we sample subsets of the input text, classify them and summarize these into a final decision. We shield three popular DL text classifiers with Sample Shielding, test their resilience against four SOTA attackers across three datasets in a realistic threat setting. Even when given the advantage of knowing about our shielding strategy the adversary’s attack success rate is \textless=10% with only one exception and often \textless 5%. Additionally, Sample Shielding maintains near original accuracy when applied to original texts. Crucially, we show that the ‘make minimal changes’ approach of SOTA attackers leads to critical vulnerabilities that can be defended against with an intuitive sampling strategy.
%R 10.18653/v1/2022.naacl-main.195
%U https://aclanthology.org/2022.naacl-main.195
%U https://doi.org/10.18653/v1/2022.naacl-main.195
%P 2716-2725
Markdown (Informal)
[Don’t sweat the small stuff, classify the rest: Sample Shielding to protect text classifiers against adversarial attacks](https://aclanthology.org/2022.naacl-main.195) (Rusert & Srinivasan, NAACL 2022)
ACL