@inproceedings{tutek-snajder-2020-staying,
title = "Staying True to Your Word: (How) Can Attention Become Explanation?",
author = "Tutek, Martin and
Snajder, Jan",
editor = "Gella, Spandana and
Welbl, Johannes and
Rei, Marek and
Petroni, Fabio and
Lewis, Patrick and
Strubell, Emma and
Seo, Minjoon and
Hajishirzi, Hannaneh",
booktitle = "Proceedings of the 5th Workshop on Representation Learning for NLP",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.repl4nlp-1.17",
doi = "10.18653/v1/2020.repl4nlp-1.17",
pages = "131--142",
abstract = "The attention mechanism has quickly become ubiquitous in NLP. In addition to improving performance of models, attention has been widely used as a glimpse into the inner workings of NLP models. The latter aspect has in the recent years become a common topic of discussion, most notably in recent work of Jain and Wallace; Wiegreffe and Pinter. With the shortcomings of using attention weights as a tool of transparency revealed, the attention mechanism has been stuck in a limbo without concrete proof when and whether it can be used as an explanation. In this paper, we provide an explanation as to why attention has seen rightful critique when used with recurrent networks in sequence classification tasks. We propose a remedy to these issues in the form of a word level objective and our findings give credibility for attention to provide faithful interpretations of recurrent models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tutek-snajder-2020-staying">
<titleInfo>
<title>Staying True to Your Word: (How) Can Attention Become Explanation?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Martin</namePart>
<namePart type="family">Tutek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Snajder</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 5th Workshop on Representation Learning for NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Spandana</namePart>
<namePart type="family">Gella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Johannes</namePart>
<namePart type="family">Welbl</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marek</namePart>
<namePart type="family">Rei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fabio</namePart>
<namePart type="family">Petroni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patrick</namePart>
<namePart type="family">Lewis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emma</namePart>
<namePart type="family">Strubell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Minjoon</namePart>
<namePart type="family">Seo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hannaneh</namePart>
<namePart type="family">Hajishirzi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The attention mechanism has quickly become ubiquitous in NLP. In addition to improving performance of models, attention has been widely used as a glimpse into the inner workings of NLP models. The latter aspect has in the recent years become a common topic of discussion, most notably in recent work of Jain and Wallace; Wiegreffe and Pinter. With the shortcomings of using attention weights as a tool of transparency revealed, the attention mechanism has been stuck in a limbo without concrete proof when and whether it can be used as an explanation. In this paper, we provide an explanation as to why attention has seen rightful critique when used with recurrent networks in sequence classification tasks. We propose a remedy to these issues in the form of a word level objective and our findings give credibility for attention to provide faithful interpretations of recurrent models.</abstract>
<identifier type="citekey">tutek-snajder-2020-staying</identifier>
<identifier type="doi">10.18653/v1/2020.repl4nlp-1.17</identifier>
<location>
<url>https://aclanthology.org/2020.repl4nlp-1.17</url>
</location>
<part>
<date>2020-07</date>
<extent unit="page">
<start>131</start>
<end>142</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Staying True to Your Word: (How) Can Attention Become Explanation?
%A Tutek, Martin
%A Snajder, Jan
%Y Gella, Spandana
%Y Welbl, Johannes
%Y Rei, Marek
%Y Petroni, Fabio
%Y Lewis, Patrick
%Y Strubell, Emma
%Y Seo, Minjoon
%Y Hajishirzi, Hannaneh
%S Proceedings of the 5th Workshop on Representation Learning for NLP
%D 2020
%8 July
%I Association for Computational Linguistics
%C Online
%F tutek-snajder-2020-staying
%X The attention mechanism has quickly become ubiquitous in NLP. In addition to improving performance of models, attention has been widely used as a glimpse into the inner workings of NLP models. The latter aspect has in the recent years become a common topic of discussion, most notably in recent work of Jain and Wallace; Wiegreffe and Pinter. With the shortcomings of using attention weights as a tool of transparency revealed, the attention mechanism has been stuck in a limbo without concrete proof when and whether it can be used as an explanation. In this paper, we provide an explanation as to why attention has seen rightful critique when used with recurrent networks in sequence classification tasks. We propose a remedy to these issues in the form of a word level objective and our findings give credibility for attention to provide faithful interpretations of recurrent models.
%R 10.18653/v1/2020.repl4nlp-1.17
%U https://aclanthology.org/2020.repl4nlp-1.17
%U https://doi.org/10.18653/v1/2020.repl4nlp-1.17
%P 131-142
Markdown (Informal)
[Staying True to Your Word: (How) Can Attention Become Explanation?](https://aclanthology.org/2020.repl4nlp-1.17) (Tutek & Snajder, RepL4NLP 2020)
ACL