@article{pruthi-etal-2022-evaluating,
title = "Evaluating Explanations: How Much Do Explanations from the Teacher Aid Students?",
author = "Pruthi, Danish and
Bansal, Rachit and
Dhingra, Bhuwan and
Baldini Soares, Livio and
Collins, Michael and
Lipton, Zachary C. and
Neubig, Graham and
Cohen, William W.",
editor = "Roark, Brian and
Nenkova, Ani",
journal = "Transactions of the Association for Computational Linguistics",
volume = "10",
year = "2022",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2022.tacl-1.21",
doi = "10.1162/tacl_a_00465",
pages = "359--375",
abstract = "While many methods purport to explain predictions by highlighting salient features, what aims these explanations serve and how they ought to be evaluated often go unstated. In this work, we introduce a framework to quantify the value of explanations via the accuracy gains that they confer on a student model trained to simulate a teacher model. Crucially, the explanations are available to the student during training, but are not available at test time. Compared with prior proposals, our approach is less easily gamed, enabling principled, automatic, model-agnostic evaluation of attributions. Using our framework, we compare numerous attribution methods for text classification and question answering, and observe quantitative differences that are consistent (to a moderate to high degree) across different student model architectures and learning strategies.1",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pruthi-etal-2022-evaluating">
<titleInfo>
<title>Evaluating Explanations: How Much Do Explanations from the Teacher Aid Students?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Danish</namePart>
<namePart type="family">Pruthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rachit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bhuwan</namePart>
<namePart type="family">Dhingra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Livio</namePart>
<namePart type="family">Baldini Soares</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Collins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zachary</namePart>
<namePart type="given">C</namePart>
<namePart type="family">Lipton</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Graham</namePart>
<namePart type="family">Neubig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">William</namePart>
<namePart type="given">W</namePart>
<namePart type="family">Cohen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>While many methods purport to explain predictions by highlighting salient features, what aims these explanations serve and how they ought to be evaluated often go unstated. In this work, we introduce a framework to quantify the value of explanations via the accuracy gains that they confer on a student model trained to simulate a teacher model. Crucially, the explanations are available to the student during training, but are not available at test time. Compared with prior proposals, our approach is less easily gamed, enabling principled, automatic, model-agnostic evaluation of attributions. Using our framework, we compare numerous attribution methods for text classification and question answering, and observe quantitative differences that are consistent (to a moderate to high degree) across different student model architectures and learning strategies.1</abstract>
<identifier type="citekey">pruthi-etal-2022-evaluating</identifier>
<identifier type="doi">10.1162/tacl_a_00465</identifier>
<location>
<url>https://aclanthology.org/2022.tacl-1.21</url>
</location>
<part>
<date>2022</date>
<detail type="volume"><number>10</number></detail>
<extent unit="page">
<start>359</start>
<end>375</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Evaluating Explanations: How Much Do Explanations from the Teacher Aid Students?
%A Pruthi, Danish
%A Bansal, Rachit
%A Dhingra, Bhuwan
%A Baldini Soares, Livio
%A Collins, Michael
%A Lipton, Zachary C.
%A Neubig, Graham
%A Cohen, William W.
%J Transactions of the Association for Computational Linguistics
%D 2022
%V 10
%I MIT Press
%C Cambridge, MA
%F pruthi-etal-2022-evaluating
%X While many methods purport to explain predictions by highlighting salient features, what aims these explanations serve and how they ought to be evaluated often go unstated. In this work, we introduce a framework to quantify the value of explanations via the accuracy gains that they confer on a student model trained to simulate a teacher model. Crucially, the explanations are available to the student during training, but are not available at test time. Compared with prior proposals, our approach is less easily gamed, enabling principled, automatic, model-agnostic evaluation of attributions. Using our framework, we compare numerous attribution methods for text classification and question answering, and observe quantitative differences that are consistent (to a moderate to high degree) across different student model architectures and learning strategies.1
%R 10.1162/tacl_a_00465
%U https://aclanthology.org/2022.tacl-1.21
%U https://doi.org/10.1162/tacl_a_00465
%P 359-375
Markdown (Informal)
[Evaluating Explanations: How Much Do Explanations from the Teacher Aid Students?](https://aclanthology.org/2022.tacl-1.21) (Pruthi et al., TACL 2022)
ACL