@inproceedings{yang-etal-2023-many,
title = "How Many and Which Training Points Would Need to be Removed to Flip this Prediction?",
author = "Yang, Jinghan and
Jain, Sarthak and
Wallace, Byron C.",
editor = "Vlachos, Andreas and
Augenstein, Isabelle",
booktitle = "Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.eacl-main.188",
doi = "10.18653/v1/2023.eacl-main.188",
pages = "2571--2584",
abstract = "We consider the problem of identifying a \textit{minimal subset} of training data $\mathcal{S}_t$ such that if the instances comprising $\mathcal{S}_t$ had been removed prior to training, the categorization of a given test point $x_t$ would have been different.Identifying such a set may be of interest for a few reasons.First, the cardinality of $\mathcal{S}_t$ provides a measure of robustness (if $|\mathcal{S}_t|$ is small for $x_t$, we might be less confident in the corresponding prediction), which we show is correlated with but complementary to predicted probabilities.Second, interrogation of $\mathcal{S}_t$ may provide a novel mechanism for \textit{contesting} a particular model prediction: If one can make the case that the points in $\mathcal{S}_t$ are wrongly labeled or irrelevant, this may argue for overturning the associated prediction. Identifying $\mathcal{S}_t$ via brute-force is intractable.We propose comparatively fast approximation methods to find $\mathcal{S}_t$ based on \textit{influence functions}, and find that{---}for simple convex text classification models{---}these approaches can often successfully identify relatively small sets of training examples which, if removed, would flip the prediction.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yang-etal-2023-many">
<titleInfo>
<title>How Many and Which Training Points Would Need to be Removed to Flip this Prediction?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jinghan</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sarthak</namePart>
<namePart type="family">Jain</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Byron</namePart>
<namePart type="given">C</namePart>
<namePart type="family">Wallace</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Vlachos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isabelle</namePart>
<namePart type="family">Augenstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We consider the problem of identifying a minimal subset of training data \mathcalS_t such that if the instances comprising \mathcalS_t had been removed prior to training, the categorization of a given test point x_t would have been different.Identifying such a set may be of interest for a few reasons.First, the cardinality of \mathcalS_t provides a measure of robustness (if |\mathcalS_t| is small for x_t, we might be less confident in the corresponding prediction), which we show is correlated with but complementary to predicted probabilities.Second, interrogation of \mathcalS_t may provide a novel mechanism for contesting a particular model prediction: If one can make the case that the points in \mathcalS_t are wrongly labeled or irrelevant, this may argue for overturning the associated prediction. Identifying \mathcalS_t via brute-force is intractable.We propose comparatively fast approximation methods to find \mathcalS_t based on influence functions, and find that—for simple convex text classification models—these approaches can often successfully identify relatively small sets of training examples which, if removed, would flip the prediction.</abstract>
<identifier type="citekey">yang-etal-2023-many</identifier>
<identifier type="doi">10.18653/v1/2023.eacl-main.188</identifier>
<location>
<url>https://aclanthology.org/2023.eacl-main.188</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>2571</start>
<end>2584</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T How Many and Which Training Points Would Need to be Removed to Flip this Prediction?
%A Yang, Jinghan
%A Jain, Sarthak
%A Wallace, Byron C.
%Y Vlachos, Andreas
%Y Augenstein, Isabelle
%S Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F yang-etal-2023-many
%X We consider the problem of identifying a minimal subset of training data \mathcalS_t such that if the instances comprising \mathcalS_t had been removed prior to training, the categorization of a given test point x_t would have been different.Identifying such a set may be of interest for a few reasons.First, the cardinality of \mathcalS_t provides a measure of robustness (if |\mathcalS_t| is small for x_t, we might be less confident in the corresponding prediction), which we show is correlated with but complementary to predicted probabilities.Second, interrogation of \mathcalS_t may provide a novel mechanism for contesting a particular model prediction: If one can make the case that the points in \mathcalS_t are wrongly labeled or irrelevant, this may argue for overturning the associated prediction. Identifying \mathcalS_t via brute-force is intractable.We propose comparatively fast approximation methods to find \mathcalS_t based on influence functions, and find that—for simple convex text classification models—these approaches can often successfully identify relatively small sets of training examples which, if removed, would flip the prediction.
%R 10.18653/v1/2023.eacl-main.188
%U https://aclanthology.org/2023.eacl-main.188
%U https://doi.org/10.18653/v1/2023.eacl-main.188
%P 2571-2584
Markdown (Informal)
[How Many and Which Training Points Would Need to be Removed to Flip this Prediction?](https://aclanthology.org/2023.eacl-main.188) (Yang et al., EACL 2023)
ACL