@inproceedings{gabriel-etal-2022-naturaladversaries,
title = "{N}atural{A}dversaries: Can Naturalistic Adversaries Be as Effective as Artificial Adversaries?",
author = "Gabriel, Saadia and
Palangi, Hamid and
Choi, Yejin",
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2022",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.findings-emnlp.413",
doi = "10.18653/v1/2022.findings-emnlp.413",
pages = "5635--5645",
abstract = "While a substantial body of prior work has explored adversarial example generation for natural language understanding tasks, these examples are often unrealistic and diverge from the real-world data distributions. In this work, we introduce a two-stage adversarial example generation framework (NaturalAdversaries), for designing adversaries that are effective at fooling a given classifier and demonstrate natural-looking failure cases that could plausibly occur during in-the-wild deployment of the models. At the first stage a token attribution method is used to summarize a given classifier{'}s behavior as a function of the key tokens in the input. In the second stage a generative model is conditioned on the key tokens from the first stage. NaturalAdversaries is adaptable to both black-box and white-box adversarial attacks based on the level of access to the model parameters. Our results indicate these adversaries generalize across domains, and offer insights for future research on improving robustness of neural text classification models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gabriel-etal-2022-naturaladversaries">
<titleInfo>
<title>NaturalAdversaries: Can Naturalistic Adversaries Be as Effective as Artificial Adversaries?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saadia</namePart>
<namePart type="family">Gabriel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hamid</namePart>
<namePart type="family">Palangi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yejin</namePart>
<namePart type="family">Choi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Goldberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>While a substantial body of prior work has explored adversarial example generation for natural language understanding tasks, these examples are often unrealistic and diverge from the real-world data distributions. In this work, we introduce a two-stage adversarial example generation framework (NaturalAdversaries), for designing adversaries that are effective at fooling a given classifier and demonstrate natural-looking failure cases that could plausibly occur during in-the-wild deployment of the models. At the first stage a token attribution method is used to summarize a given classifier’s behavior as a function of the key tokens in the input. In the second stage a generative model is conditioned on the key tokens from the first stage. NaturalAdversaries is adaptable to both black-box and white-box adversarial attacks based on the level of access to the model parameters. Our results indicate these adversaries generalize across domains, and offer insights for future research on improving robustness of neural text classification models.</abstract>
<identifier type="citekey">gabriel-etal-2022-naturaladversaries</identifier>
<identifier type="doi">10.18653/v1/2022.findings-emnlp.413</identifier>
<location>
<url>https://aclanthology.org/2022.findings-emnlp.413</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>5635</start>
<end>5645</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T NaturalAdversaries: Can Naturalistic Adversaries Be as Effective as Artificial Adversaries?
%A Gabriel, Saadia
%A Palangi, Hamid
%A Choi, Yejin
%Y Goldberg, Yoav
%Y Kozareva, Zornitsa
%Y Zhang, Yue
%S Findings of the Association for Computational Linguistics: EMNLP 2022
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates
%F gabriel-etal-2022-naturaladversaries
%X While a substantial body of prior work has explored adversarial example generation for natural language understanding tasks, these examples are often unrealistic and diverge from the real-world data distributions. In this work, we introduce a two-stage adversarial example generation framework (NaturalAdversaries), for designing adversaries that are effective at fooling a given classifier and demonstrate natural-looking failure cases that could plausibly occur during in-the-wild deployment of the models. At the first stage a token attribution method is used to summarize a given classifier’s behavior as a function of the key tokens in the input. In the second stage a generative model is conditioned on the key tokens from the first stage. NaturalAdversaries is adaptable to both black-box and white-box adversarial attacks based on the level of access to the model parameters. Our results indicate these adversaries generalize across domains, and offer insights for future research on improving robustness of neural text classification models.
%R 10.18653/v1/2022.findings-emnlp.413
%U https://aclanthology.org/2022.findings-emnlp.413
%U https://doi.org/10.18653/v1/2022.findings-emnlp.413
%P 5635-5645
Markdown (Informal)
[NaturalAdversaries: Can Naturalistic Adversaries Be as Effective as Artificial Adversaries?](https://aclanthology.org/2022.findings-emnlp.413) (Gabriel et al., Findings 2022)
ACL