@inproceedings{ronnqvist-etal-2022-explaining,
title = "Explaining Classes through Stable Word Attributions",
author = {R{\"o}nnqvist, Samuel and
Kyr{\"o}l{\"a}inen, Aki-Juhani and
Myntti, Amanda and
Ginter, Filip and
Laippala, Veronika},
booktitle = "Findings of the Association for Computational Linguistics: ACL 2022",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.findings-acl.85",
doi = "10.18653/v1/2022.findings-acl.85",
pages = "1063--1074",
abstract = "Input saliency methods have recently become a popular tool for explaining predictions of deep learning models in NLP. Nevertheless, there has been little work investigating methods for aggregating prediction-level explanations to the class level, nor has a framework for evaluating such class explanations been established. We explore explanations based on XLM-R and the Integrated Gradients input attribution method, and propose 1) the Stable Attribution Class Explanation method (SACX) to extract keyword lists of classes in text classification tasks, and 2) a framework for the systematic evaluation of the keyword lists. We find that explanations of individual predictions are prone to noise, but that stable explanations can be effectively identified through repeated training and explanation. We evaluate on web register data and show that the class explanations are linguistically meaningful and distinguishing of the classes.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ronnqvist-etal-2022-explaining">
<titleInfo>
<title>Explaining Classes through Stable Word Attributions</title>
</titleInfo>
<name type="personal">
<namePart type="given">Samuel</namePart>
<namePart type="family">Rönnqvist</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aki-Juhani</namePart>
<namePart type="family">Kyröläinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amanda</namePart>
<namePart type="family">Myntti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Filip</namePart>
<namePart type="family">Ginter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronika</namePart>
<namePart type="family">Laippala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2022</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Input saliency methods have recently become a popular tool for explaining predictions of deep learning models in NLP. Nevertheless, there has been little work investigating methods for aggregating prediction-level explanations to the class level, nor has a framework for evaluating such class explanations been established. We explore explanations based on XLM-R and the Integrated Gradients input attribution method, and propose 1) the Stable Attribution Class Explanation method (SACX) to extract keyword lists of classes in text classification tasks, and 2) a framework for the systematic evaluation of the keyword lists. We find that explanations of individual predictions are prone to noise, but that stable explanations can be effectively identified through repeated training and explanation. We evaluate on web register data and show that the class explanations are linguistically meaningful and distinguishing of the classes.</abstract>
<identifier type="citekey">ronnqvist-etal-2022-explaining</identifier>
<identifier type="doi">10.18653/v1/2022.findings-acl.85</identifier>
<location>
<url>https://aclanthology.org/2022.findings-acl.85</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>1063</start>
<end>1074</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Explaining Classes through Stable Word Attributions
%A Rönnqvist, Samuel
%A Kyröläinen, Aki-Juhani
%A Myntti, Amanda
%A Ginter, Filip
%A Laippala, Veronika
%S Findings of the Association for Computational Linguistics: ACL 2022
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F ronnqvist-etal-2022-explaining
%X Input saliency methods have recently become a popular tool for explaining predictions of deep learning models in NLP. Nevertheless, there has been little work investigating methods for aggregating prediction-level explanations to the class level, nor has a framework for evaluating such class explanations been established. We explore explanations based on XLM-R and the Integrated Gradients input attribution method, and propose 1) the Stable Attribution Class Explanation method (SACX) to extract keyword lists of classes in text classification tasks, and 2) a framework for the systematic evaluation of the keyword lists. We find that explanations of individual predictions are prone to noise, but that stable explanations can be effectively identified through repeated training and explanation. We evaluate on web register data and show that the class explanations are linguistically meaningful and distinguishing of the classes.
%R 10.18653/v1/2022.findings-acl.85
%U https://aclanthology.org/2022.findings-acl.85
%U https://doi.org/10.18653/v1/2022.findings-acl.85
%P 1063-1074
Markdown (Informal)
[Explaining Classes through Stable Word Attributions](https://aclanthology.org/2022.findings-acl.85) (Rönnqvist et al., Findings 2022)
ACL
- Samuel Rönnqvist, Aki-Juhani Kyröläinen, Amanda Myntti, Filip Ginter, and Veronika Laippala. 2022. Explaining Classes through Stable Word Attributions. In Findings of the Association for Computational Linguistics: ACL 2022, pages 1063–1074, Dublin, Ireland. Association for Computational Linguistics.