@inproceedings{sun-lu-2020-understanding,
title = "Understanding Attention for Text Classification",
author = "Sun, Xiaobing and
Lu, Wei",
editor = "Jurafsky, Dan and
Chai, Joyce and
Schluter, Natalie and
Tetreault, Joel",
booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.acl-main.312",
doi = "10.18653/v1/2020.acl-main.312",
pages = "3418--3428",
abstract = "Attention has been proven successful in many natural language processing (NLP) tasks. Recently, many researchers started to investigate the interpretability of attention on NLP tasks. Many existing approaches focused on examining whether the local attention weights could reflect the importance of input representations. In this work, we present a study on understanding the internal mechanism of attention by looking into the gradient update process, checking its behavior when approaching a local minimum during training. We propose to analyze for each word token the following two quantities: its polarity score and its attention score, where the latter is a global assessment on the token{'}s significance. We discuss conditions under which the attention mechanism may become more (or less) interpretable, and show how the interplay between the two quantities can contribute towards model performance.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sun-lu-2020-understanding">
<titleInfo>
<title>Understanding Attention for Text Classification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xiaobing</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Lu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Jurafsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Chai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Natalie</namePart>
<namePart type="family">Schluter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joel</namePart>
<namePart type="family">Tetreault</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Attention has been proven successful in many natural language processing (NLP) tasks. Recently, many researchers started to investigate the interpretability of attention on NLP tasks. Many existing approaches focused on examining whether the local attention weights could reflect the importance of input representations. In this work, we present a study on understanding the internal mechanism of attention by looking into the gradient update process, checking its behavior when approaching a local minimum during training. We propose to analyze for each word token the following two quantities: its polarity score and its attention score, where the latter is a global assessment on the token’s significance. We discuss conditions under which the attention mechanism may become more (or less) interpretable, and show how the interplay between the two quantities can contribute towards model performance.</abstract>
<identifier type="citekey">sun-lu-2020-understanding</identifier>
<identifier type="doi">10.18653/v1/2020.acl-main.312</identifier>
<location>
<url>https://aclanthology.org/2020.acl-main.312</url>
</location>
<part>
<date>2020-07</date>
<extent unit="page">
<start>3418</start>
<end>3428</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Understanding Attention for Text Classification
%A Sun, Xiaobing
%A Lu, Wei
%Y Jurafsky, Dan
%Y Chai, Joyce
%Y Schluter, Natalie
%Y Tetreault, Joel
%S Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics
%D 2020
%8 July
%I Association for Computational Linguistics
%C Online
%F sun-lu-2020-understanding
%X Attention has been proven successful in many natural language processing (NLP) tasks. Recently, many researchers started to investigate the interpretability of attention on NLP tasks. Many existing approaches focused on examining whether the local attention weights could reflect the importance of input representations. In this work, we present a study on understanding the internal mechanism of attention by looking into the gradient update process, checking its behavior when approaching a local minimum during training. We propose to analyze for each word token the following two quantities: its polarity score and its attention score, where the latter is a global assessment on the token’s significance. We discuss conditions under which the attention mechanism may become more (or less) interpretable, and show how the interplay between the two quantities can contribute towards model performance.
%R 10.18653/v1/2020.acl-main.312
%U https://aclanthology.org/2020.acl-main.312
%U https://doi.org/10.18653/v1/2020.acl-main.312
%P 3418-3428
Markdown (Informal)
[Understanding Attention for Text Classification](https://aclanthology.org/2020.acl-main.312) (Sun & Lu, ACL 2020)
ACL
- Xiaobing Sun and Wei Lu. 2020. Understanding Attention for Text Classification. In Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pages 3418–3428, Online. Association for Computational Linguistics.