@inproceedings{welbl-etal-2020-undersensitivity,
title = "Undersensitivity in Neural Reading Comprehension",
author = "Welbl, Johannes and
Minervini, Pasquale and
Bartolo, Max and
Stenetorp, Pontus and
Riedel, Sebastian",
editor = "Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2020",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.findings-emnlp.103",
doi = "10.18653/v1/2020.findings-emnlp.103",
pages = "1152--1165",
abstract = "Current reading comprehension methods generalise well to in-distribution test sets, yet perform poorly on adversarially selected data. Prior work on adversarial inputs typically studies model oversensitivity: semantically invariant text perturbations that cause a model{'}s prediction to change. Here we focus on the complementary problem: excessive prediction undersensitivity, where input text is meaningfully changed but the model{'}s prediction does not, even though it should. We formulate an adversarial attack which searches among semantic variations of the question for which a model erroneously predicts the same answer, and with even higher probability. We demonstrate that models trained on both SQuAD2.0 and NewsQA are vulnerable to this attack, and then investigate data augmentation and adversarial training as defences. Both substantially decrease adversarial vulnerability, which generalises to held-out data and held-out attack spaces. Addressing undersensitivity furthermore improves model robustness on the previously introduced ADDSENT and ADDONESENT datasets, and models generalise better when facing train / evaluation distribution mismatch: they are less prone to overly rely on shallow predictive cues present only in the training set, and outperform a conventional model by as much as 10.9{\%} F1.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="welbl-etal-2020-undersensitivity">
<titleInfo>
<title>Undersensitivity in Neural Reading Comprehension</title>
</titleInfo>
<name type="personal">
<namePart type="given">Johannes</namePart>
<namePart type="family">Welbl</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pasquale</namePart>
<namePart type="family">Minervini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Max</namePart>
<namePart type="family">Bartolo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pontus</namePart>
<namePart type="family">Stenetorp</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Riedel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2020</title>
</titleInfo>
<name type="personal">
<namePart type="given">Trevor</namePart>
<namePart type="family">Cohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Current reading comprehension methods generalise well to in-distribution test sets, yet perform poorly on adversarially selected data. Prior work on adversarial inputs typically studies model oversensitivity: semantically invariant text perturbations that cause a model’s prediction to change. Here we focus on the complementary problem: excessive prediction undersensitivity, where input text is meaningfully changed but the model’s prediction does not, even though it should. We formulate an adversarial attack which searches among semantic variations of the question for which a model erroneously predicts the same answer, and with even higher probability. We demonstrate that models trained on both SQuAD2.0 and NewsQA are vulnerable to this attack, and then investigate data augmentation and adversarial training as defences. Both substantially decrease adversarial vulnerability, which generalises to held-out data and held-out attack spaces. Addressing undersensitivity furthermore improves model robustness on the previously introduced ADDSENT and ADDONESENT datasets, and models generalise better when facing train / evaluation distribution mismatch: they are less prone to overly rely on shallow predictive cues present only in the training set, and outperform a conventional model by as much as 10.9% F1.</abstract>
<identifier type="citekey">welbl-etal-2020-undersensitivity</identifier>
<identifier type="doi">10.18653/v1/2020.findings-emnlp.103</identifier>
<location>
<url>https://aclanthology.org/2020.findings-emnlp.103</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>1152</start>
<end>1165</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Undersensitivity in Neural Reading Comprehension
%A Welbl, Johannes
%A Minervini, Pasquale
%A Bartolo, Max
%A Stenetorp, Pontus
%A Riedel, Sebastian
%Y Cohn, Trevor
%Y He, Yulan
%Y Liu, Yang
%S Findings of the Association for Computational Linguistics: EMNLP 2020
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F welbl-etal-2020-undersensitivity
%X Current reading comprehension methods generalise well to in-distribution test sets, yet perform poorly on adversarially selected data. Prior work on adversarial inputs typically studies model oversensitivity: semantically invariant text perturbations that cause a model’s prediction to change. Here we focus on the complementary problem: excessive prediction undersensitivity, where input text is meaningfully changed but the model’s prediction does not, even though it should. We formulate an adversarial attack which searches among semantic variations of the question for which a model erroneously predicts the same answer, and with even higher probability. We demonstrate that models trained on both SQuAD2.0 and NewsQA are vulnerable to this attack, and then investigate data augmentation and adversarial training as defences. Both substantially decrease adversarial vulnerability, which generalises to held-out data and held-out attack spaces. Addressing undersensitivity furthermore improves model robustness on the previously introduced ADDSENT and ADDONESENT datasets, and models generalise better when facing train / evaluation distribution mismatch: they are less prone to overly rely on shallow predictive cues present only in the training set, and outperform a conventional model by as much as 10.9% F1.
%R 10.18653/v1/2020.findings-emnlp.103
%U https://aclanthology.org/2020.findings-emnlp.103
%U https://doi.org/10.18653/v1/2020.findings-emnlp.103
%P 1152-1165
Markdown (Informal)
[Undersensitivity in Neural Reading Comprehension](https://aclanthology.org/2020.findings-emnlp.103) (Welbl et al., Findings 2020)
ACL
- Johannes Welbl, Pasquale Minervini, Max Bartolo, Pontus Stenetorp, and Sebastian Riedel. 2020. Undersensitivity in Neural Reading Comprehension. In Findings of the Association for Computational Linguistics: EMNLP 2020, pages 1152–1165, Online. Association for Computational Linguistics.