@inproceedings{chen-etal-2022-balanced,
title = "Balanced Adversarial Training: Balancing Tradeoffs between Fickleness and Obstinacy in {NLP} Models",
author = "Chen, Hannah and
Ji, Yangfeng and
Evans, David",
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.emnlp-main.40",
doi = "10.18653/v1/2022.emnlp-main.40",
pages = "632--647",
abstract = "Traditional (fickle) adversarial examples involve finding a small perturbation that does not change an input{'}s true label but confuses the classifier into outputting a different prediction. Conversely, obstinate adversarial examples occur when an adversary finds a small perturbation that preserves the classifier{'}s prediction but changes the true label of an input.Adversarial training and certified robust training have shown some effectiveness in improving the robustness of machine learnt models to fickle adversarial examples. We show that standard adversarial training methods focused on reducing vulnerability to fickle adversarial examples may make a model more vulnerable to obstinate adversarial examples, with experiments for both natural language inference and paraphrase identification tasks. To counter this phenomenon, we introduce Balanced Adversarial Training, which incorporates contrastive learning to increase robustness against both fickle and obstinate adversarial examples.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chen-etal-2022-balanced">
<titleInfo>
<title>Balanced Adversarial Training: Balancing Tradeoffs between Fickleness and Obstinacy in NLP Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hannah</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yangfeng</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Evans</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Goldberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Traditional (fickle) adversarial examples involve finding a small perturbation that does not change an input’s true label but confuses the classifier into outputting a different prediction. Conversely, obstinate adversarial examples occur when an adversary finds a small perturbation that preserves the classifier’s prediction but changes the true label of an input.Adversarial training and certified robust training have shown some effectiveness in improving the robustness of machine learnt models to fickle adversarial examples. We show that standard adversarial training methods focused on reducing vulnerability to fickle adversarial examples may make a model more vulnerable to obstinate adversarial examples, with experiments for both natural language inference and paraphrase identification tasks. To counter this phenomenon, we introduce Balanced Adversarial Training, which incorporates contrastive learning to increase robustness against both fickle and obstinate adversarial examples.</abstract>
<identifier type="citekey">chen-etal-2022-balanced</identifier>
<identifier type="doi">10.18653/v1/2022.emnlp-main.40</identifier>
<location>
<url>https://aclanthology.org/2022.emnlp-main.40</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>632</start>
<end>647</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Balanced Adversarial Training: Balancing Tradeoffs between Fickleness and Obstinacy in NLP Models
%A Chen, Hannah
%A Ji, Yangfeng
%A Evans, David
%Y Goldberg, Yoav
%Y Kozareva, Zornitsa
%Y Zhang, Yue
%S Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates
%F chen-etal-2022-balanced
%X Traditional (fickle) adversarial examples involve finding a small perturbation that does not change an input’s true label but confuses the classifier into outputting a different prediction. Conversely, obstinate adversarial examples occur when an adversary finds a small perturbation that preserves the classifier’s prediction but changes the true label of an input.Adversarial training and certified robust training have shown some effectiveness in improving the robustness of machine learnt models to fickle adversarial examples. We show that standard adversarial training methods focused on reducing vulnerability to fickle adversarial examples may make a model more vulnerable to obstinate adversarial examples, with experiments for both natural language inference and paraphrase identification tasks. To counter this phenomenon, we introduce Balanced Adversarial Training, which incorporates contrastive learning to increase robustness against both fickle and obstinate adversarial examples.
%R 10.18653/v1/2022.emnlp-main.40
%U https://aclanthology.org/2022.emnlp-main.40
%U https://doi.org/10.18653/v1/2022.emnlp-main.40
%P 632-647
Markdown (Informal)
[Balanced Adversarial Training: Balancing Tradeoffs between Fickleness and Obstinacy in NLP Models](https://aclanthology.org/2022.emnlp-main.40) (Chen et al., EMNLP 2022)
ACL