@inproceedings{rajagopal-etal-2021-selfexplain,
title = "{SELFEXPLAIN}: A Self-Explaining Architecture for Neural Text Classifiers",
author = "Rajagopal, Dheeraj and
Balachandran, Vidhisha and
Hovy, Eduard H and
Tsvetkov, Yulia",
editor = "Moens, Marie-Francine and
Huang, Xuanjing and
Specia, Lucia and
Yih, Scott Wen-tau",
booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2021",
address = "Online and Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.emnlp-main.64",
doi = "10.18653/v1/2021.emnlp-main.64",
pages = "836--850",
abstract = "We introduce SelfExplain, a novel self-explaining model that explains a text classifier{'}s predictions using phrase-based concepts. SelfExplain augments existing neural classifiers by adding (1) a globally interpretable layer that identifies the most influential concepts in the training set for a given sample and (2) a locally interpretable layer that quantifies the contribution of each local input concept by computing a relevance score relative to the predicted label. Experiments across five text-classification datasets show that SelfExplain facilitates interpretability without sacrificing performance. Most importantly, explanations from SelfExplain show sufficiency for model predictions and are perceived as adequate, trustworthy and understandable by human judges compared to existing widely-used baselines.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rajagopal-etal-2021-selfexplain">
<titleInfo>
<title>SELFEXPLAIN: A Self-Explaining Architecture for Neural Text Classifiers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dheeraj</namePart>
<namePart type="family">Rajagopal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vidhisha</namePart>
<namePart type="family">Balachandran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eduard</namePart>
<namePart type="given">H</namePart>
<namePart type="family">Hovy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulia</namePart>
<namePart type="family">Tsvetkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marie-Francine</namePart>
<namePart type="family">Moens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuanjing</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucia</namePart>
<namePart type="family">Specia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Scott</namePart>
<namePart type="given">Wen-tau</namePart>
<namePart type="family">Yih</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online and Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We introduce SelfExplain, a novel self-explaining model that explains a text classifier’s predictions using phrase-based concepts. SelfExplain augments existing neural classifiers by adding (1) a globally interpretable layer that identifies the most influential concepts in the training set for a given sample and (2) a locally interpretable layer that quantifies the contribution of each local input concept by computing a relevance score relative to the predicted label. Experiments across five text-classification datasets show that SelfExplain facilitates interpretability without sacrificing performance. Most importantly, explanations from SelfExplain show sufficiency for model predictions and are perceived as adequate, trustworthy and understandable by human judges compared to existing widely-used baselines.</abstract>
<identifier type="citekey">rajagopal-etal-2021-selfexplain</identifier>
<identifier type="doi">10.18653/v1/2021.emnlp-main.64</identifier>
<location>
<url>https://aclanthology.org/2021.emnlp-main.64</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>836</start>
<end>850</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SELFEXPLAIN: A Self-Explaining Architecture for Neural Text Classifiers
%A Rajagopal, Dheeraj
%A Balachandran, Vidhisha
%A Hovy, Eduard H.
%A Tsvetkov, Yulia
%Y Moens, Marie-Francine
%Y Huang, Xuanjing
%Y Specia, Lucia
%Y Yih, Scott Wen-tau
%S Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing
%D 2021
%8 November
%I Association for Computational Linguistics
%C Online and Punta Cana, Dominican Republic
%F rajagopal-etal-2021-selfexplain
%X We introduce SelfExplain, a novel self-explaining model that explains a text classifier’s predictions using phrase-based concepts. SelfExplain augments existing neural classifiers by adding (1) a globally interpretable layer that identifies the most influential concepts in the training set for a given sample and (2) a locally interpretable layer that quantifies the contribution of each local input concept by computing a relevance score relative to the predicted label. Experiments across five text-classification datasets show that SelfExplain facilitates interpretability without sacrificing performance. Most importantly, explanations from SelfExplain show sufficiency for model predictions and are perceived as adequate, trustworthy and understandable by human judges compared to existing widely-used baselines.
%R 10.18653/v1/2021.emnlp-main.64
%U https://aclanthology.org/2021.emnlp-main.64
%U https://doi.org/10.18653/v1/2021.emnlp-main.64
%P 836-850
Markdown (Informal)
[SELFEXPLAIN: A Self-Explaining Architecture for Neural Text Classifiers](https://aclanthology.org/2021.emnlp-main.64) (Rajagopal et al., EMNLP 2021)
ACL