@inproceedings{kamath-etal-2020-selective,
title = "Selective Question Answering under Domain Shift",
author = "Kamath, Amita and
Jia, Robin and
Liang, Percy",
booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.acl-main.503",
doi = "10.18653/v1/2020.acl-main.503",
pages = "5684--5696",
abstract = "To avoid giving wrong answers, question answering (QA) models need to know when to abstain from answering. Moreover, users often ask questions that diverge from the model{'}s training data, making errors more likely and thus abstention more critical. In this work, we propose the setting of selective question answering under domain shift, in which a QA model is tested on a mixture of in-domain and out-of-domain data, and must answer (i.e., not abstain on) as many questions as possible while maintaining high accuracy. Abstention policies based solely on the model{'}s softmax probabilities fare poorly, since models are overconfident on out-of-domain inputs. Instead, we train a calibrator to identify inputs on which the QA model errs, and abstain when it predicts an error is likely. Crucially, the calibrator benefits from observing the model{'}s behavior on out-of-domain data, even if from a different domain than the test data. We combine this method with a SQuAD-trained QA model and evaluate on mixtures of SQuAD and five other QA datasets. Our method answers 56{\%} of questions while maintaining 80{\%} accuracy; in contrast, directly using the model{'}s probabilities only answers 48{\%} at 80{\%} accuracy.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kamath-etal-2020-selective">
<titleInfo>
<title>Selective Question Answering under Domain Shift</title>
</titleInfo>
<name type="personal">
<namePart type="given">Amita</namePart>
<namePart type="family">Kamath</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Robin</namePart>
<namePart type="family">Jia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Percy</namePart>
<namePart type="family">Liang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>To avoid giving wrong answers, question answering (QA) models need to know when to abstain from answering. Moreover, users often ask questions that diverge from the model’s training data, making errors more likely and thus abstention more critical. In this work, we propose the setting of selective question answering under domain shift, in which a QA model is tested on a mixture of in-domain and out-of-domain data, and must answer (i.e., not abstain on) as many questions as possible while maintaining high accuracy. Abstention policies based solely on the model’s softmax probabilities fare poorly, since models are overconfident on out-of-domain inputs. Instead, we train a calibrator to identify inputs on which the QA model errs, and abstain when it predicts an error is likely. Crucially, the calibrator benefits from observing the model’s behavior on out-of-domain data, even if from a different domain than the test data. We combine this method with a SQuAD-trained QA model and evaluate on mixtures of SQuAD and five other QA datasets. Our method answers 56% of questions while maintaining 80% accuracy; in contrast, directly using the model’s probabilities only answers 48% at 80% accuracy.</abstract>
<identifier type="citekey">kamath-etal-2020-selective</identifier>
<identifier type="doi">10.18653/v1/2020.acl-main.503</identifier>
<location>
<url>https://aclanthology.org/2020.acl-main.503</url>
</location>
<part>
<date>2020-07</date>
<extent unit="page">
<start>5684</start>
<end>5696</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Selective Question Answering under Domain Shift
%A Kamath, Amita
%A Jia, Robin
%A Liang, Percy
%S Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics
%D 2020
%8 July
%I Association for Computational Linguistics
%C Online
%F kamath-etal-2020-selective
%X To avoid giving wrong answers, question answering (QA) models need to know when to abstain from answering. Moreover, users often ask questions that diverge from the model’s training data, making errors more likely and thus abstention more critical. In this work, we propose the setting of selective question answering under domain shift, in which a QA model is tested on a mixture of in-domain and out-of-domain data, and must answer (i.e., not abstain on) as many questions as possible while maintaining high accuracy. Abstention policies based solely on the model’s softmax probabilities fare poorly, since models are overconfident on out-of-domain inputs. Instead, we train a calibrator to identify inputs on which the QA model errs, and abstain when it predicts an error is likely. Crucially, the calibrator benefits from observing the model’s behavior on out-of-domain data, even if from a different domain than the test data. We combine this method with a SQuAD-trained QA model and evaluate on mixtures of SQuAD and five other QA datasets. Our method answers 56% of questions while maintaining 80% accuracy; in contrast, directly using the model’s probabilities only answers 48% at 80% accuracy.
%R 10.18653/v1/2020.acl-main.503
%U https://aclanthology.org/2020.acl-main.503
%U https://doi.org/10.18653/v1/2020.acl-main.503
%P 5684-5696
Markdown (Informal)
[Selective Question Answering under Domain Shift](https://aclanthology.org/2020.acl-main.503) (Kamath et al., ACL 2020)
ACL
- Amita Kamath, Robin Jia, and Percy Liang. 2020. Selective Question Answering under Domain Shift. In Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pages 5684–5696, Online. Association for Computational Linguistics.