@inproceedings{bashier-etal-2022-locally,
title = "Locally Distributed Activation Vectors for Guided Feature Attribution",
author = "Bashier, Housam K. B. and
Kim, Mi-Young and
Goebel, Randy",
editor = "Calzolari, Nicoletta and
Huang, Chu-Ren and
Kim, Hansaem and
Pustejovsky, James and
Wanner, Leo and
Choi, Key-Sun and
Ryu, Pum-Mo and
Chen, Hsin-Hsi and
Donatelli, Lucia and
Ji, Heng and
Kurohashi, Sadao and
Paggio, Patrizia and
Xue, Nianwen and
Kim, Seokhwan and
Hahm, Younggyun and
He, Zhong and
Lee, Tony Kyungil and
Santus, Enrico and
Bond, Francis and
Na, Seung-Hoon",
booktitle = "Proceedings of the 29th International Conference on Computational Linguistics",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2022.coling-1.83",
pages = "994--1005",
abstract = "Explaining the predictions of a deep neural network (DNN) is a challenging problem. Many attempts at interpreting those predictions have focused on attribution-based methods, which assess the contributions of individual features to each model prediction. However, attribution-based explanations do not always provide faithful explanations to the target model, e.g., noisy gradients can result in unfaithful feature attribution for back-propagation methods. We present a method to learn explanations-specific representations while constructing deep network models for text classification. These representations can be used to faithfully interpret black-box predictions, i.e., highlighting the most important input features and their role in any particular prediction. We show that learning specific representations improves model interpretability across various tasks, for both qualitative and quantitative evaluations, while preserving predictive performance.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bashier-etal-2022-locally">
<titleInfo>
<title>Locally Distributed Activation Vectors for Guided Feature Attribution</title>
</titleInfo>
<name type="personal">
<namePart type="given">Housam</namePart>
<namePart type="given">K</namePart>
<namePart type="given">B</namePart>
<namePart type="family">Bashier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mi-Young</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Randy</namePart>
<namePart type="family">Goebel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 29th International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chu-Ren</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hansaem</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Pustejovsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Key-Sun</namePart>
<namePart type="family">Choi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pum-Mo</namePart>
<namePart type="family">Ryu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hsin-Hsi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucia</namePart>
<namePart type="family">Donatelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heng</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sadao</namePart>
<namePart type="family">Kurohashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patrizia</namePart>
<namePart type="family">Paggio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seokhwan</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Younggyun</namePart>
<namePart type="family">Hahm</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhong</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tony</namePart>
<namePart type="given">Kyungil</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Enrico</namePart>
<namePart type="family">Santus</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francis</namePart>
<namePart type="family">Bond</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seung-Hoon</namePart>
<namePart type="family">Na</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gyeongju, Republic of Korea</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Explaining the predictions of a deep neural network (DNN) is a challenging problem. Many attempts at interpreting those predictions have focused on attribution-based methods, which assess the contributions of individual features to each model prediction. However, attribution-based explanations do not always provide faithful explanations to the target model, e.g., noisy gradients can result in unfaithful feature attribution for back-propagation methods. We present a method to learn explanations-specific representations while constructing deep network models for text classification. These representations can be used to faithfully interpret black-box predictions, i.e., highlighting the most important input features and their role in any particular prediction. We show that learning specific representations improves model interpretability across various tasks, for both qualitative and quantitative evaluations, while preserving predictive performance.</abstract>
<identifier type="citekey">bashier-etal-2022-locally</identifier>
<location>
<url>https://aclanthology.org/2022.coling-1.83</url>
</location>
<part>
<date>2022-10</date>
<extent unit="page">
<start>994</start>
<end>1005</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Locally Distributed Activation Vectors for Guided Feature Attribution
%A Bashier, Housam K. B.
%A Kim, Mi-Young
%A Goebel, Randy
%Y Calzolari, Nicoletta
%Y Huang, Chu-Ren
%Y Kim, Hansaem
%Y Pustejovsky, James
%Y Wanner, Leo
%Y Choi, Key-Sun
%Y Ryu, Pum-Mo
%Y Chen, Hsin-Hsi
%Y Donatelli, Lucia
%Y Ji, Heng
%Y Kurohashi, Sadao
%Y Paggio, Patrizia
%Y Xue, Nianwen
%Y Kim, Seokhwan
%Y Hahm, Younggyun
%Y He, Zhong
%Y Lee, Tony Kyungil
%Y Santus, Enrico
%Y Bond, Francis
%Y Na, Seung-Hoon
%S Proceedings of the 29th International Conference on Computational Linguistics
%D 2022
%8 October
%I International Committee on Computational Linguistics
%C Gyeongju, Republic of Korea
%F bashier-etal-2022-locally
%X Explaining the predictions of a deep neural network (DNN) is a challenging problem. Many attempts at interpreting those predictions have focused on attribution-based methods, which assess the contributions of individual features to each model prediction. However, attribution-based explanations do not always provide faithful explanations to the target model, e.g., noisy gradients can result in unfaithful feature attribution for back-propagation methods. We present a method to learn explanations-specific representations while constructing deep network models for text classification. These representations can be used to faithfully interpret black-box predictions, i.e., highlighting the most important input features and their role in any particular prediction. We show that learning specific representations improves model interpretability across various tasks, for both qualitative and quantitative evaluations, while preserving predictive performance.
%U https://aclanthology.org/2022.coling-1.83
%P 994-1005
Markdown (Informal)
[Locally Distributed Activation Vectors for Guided Feature Attribution](https://aclanthology.org/2022.coling-1.83) (Bashier et al., COLING 2022)
ACL