@inproceedings{skrlj-etal-2021-exploring,
title = "Exploring Neural Language Models via Analysis of Local and Global Self-Attention Spaces",
author = "{\v{S}}krlj, Bla{\v{z}} and
Sheehan, Shane and
Er{\v{z}}en, Nika and
Robnik-{\v{S}}ikonja, Marko and
Luz, Saturnino and
Pollak, Senja",
editor = "Toivonen, Hannu and
Boggia, Michele",
booktitle = "Proceedings of the EACL Hackashop on News Media Content Analysis and Automated Report Generation",
month = apr,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.hackashop-1.11",
pages = "76--83",
abstract = "Large pretrained language models using the transformer neural network architecture are becoming a dominant methodology for many natural language processing tasks, such as question answering, text classification, word sense disambiguation, text completion and machine translation. Commonly comprising hundreds of millions of parameters, these models offer state-of-the-art performance, but at the expense of interpretability. The attention mechanism is the main component of transformer networks. We present AttViz, a method for exploration of self-attention in transformer networks, which can help in explanation and debugging of the trained models by showing associations between text tokens in an input sequence. We show that existing deep learning pipelines can be explored with AttViz, which offers novel visualizations of the attention heads and their aggregations. We implemented the proposed methods in an online toolkit and an offline library. Using examples from news analysis, we demonstrate how AttViz can be used to inspect and potentially better understand what a model has learned.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="skrlj-etal-2021-exploring">
<titleInfo>
<title>Exploring Neural Language Models via Analysis of Local and Global Self-Attention Spaces</title>
</titleInfo>
<name type="personal">
<namePart type="given">Blaž</namePart>
<namePart type="family">Škrlj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shane</namePart>
<namePart type="family">Sheehan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nika</namePart>
<namePart type="family">Eržen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marko</namePart>
<namePart type="family">Robnik-Šikonja</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saturnino</namePart>
<namePart type="family">Luz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Senja</namePart>
<namePart type="family">Pollak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the EACL Hackashop on News Media Content Analysis and Automated Report Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hannu</namePart>
<namePart type="family">Toivonen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michele</namePart>
<namePart type="family">Boggia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large pretrained language models using the transformer neural network architecture are becoming a dominant methodology for many natural language processing tasks, such as question answering, text classification, word sense disambiguation, text completion and machine translation. Commonly comprising hundreds of millions of parameters, these models offer state-of-the-art performance, but at the expense of interpretability. The attention mechanism is the main component of transformer networks. We present AttViz, a method for exploration of self-attention in transformer networks, which can help in explanation and debugging of the trained models by showing associations between text tokens in an input sequence. We show that existing deep learning pipelines can be explored with AttViz, which offers novel visualizations of the attention heads and their aggregations. We implemented the proposed methods in an online toolkit and an offline library. Using examples from news analysis, we demonstrate how AttViz can be used to inspect and potentially better understand what a model has learned.</abstract>
<identifier type="citekey">skrlj-etal-2021-exploring</identifier>
<location>
<url>https://aclanthology.org/2021.hackashop-1.11</url>
</location>
<part>
<date>2021-04</date>
<extent unit="page">
<start>76</start>
<end>83</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Exploring Neural Language Models via Analysis of Local and Global Self-Attention Spaces
%A Škrlj, Blaž
%A Sheehan, Shane
%A Eržen, Nika
%A Robnik-Šikonja, Marko
%A Luz, Saturnino
%A Pollak, Senja
%Y Toivonen, Hannu
%Y Boggia, Michele
%S Proceedings of the EACL Hackashop on News Media Content Analysis and Automated Report Generation
%D 2021
%8 April
%I Association for Computational Linguistics
%C Online
%F skrlj-etal-2021-exploring
%X Large pretrained language models using the transformer neural network architecture are becoming a dominant methodology for many natural language processing tasks, such as question answering, text classification, word sense disambiguation, text completion and machine translation. Commonly comprising hundreds of millions of parameters, these models offer state-of-the-art performance, but at the expense of interpretability. The attention mechanism is the main component of transformer networks. We present AttViz, a method for exploration of self-attention in transformer networks, which can help in explanation and debugging of the trained models by showing associations between text tokens in an input sequence. We show that existing deep learning pipelines can be explored with AttViz, which offers novel visualizations of the attention heads and their aggregations. We implemented the proposed methods in an online toolkit and an offline library. Using examples from news analysis, we demonstrate how AttViz can be used to inspect and potentially better understand what a model has learned.
%U https://aclanthology.org/2021.hackashop-1.11
%P 76-83
Markdown (Informal)
[Exploring Neural Language Models via Analysis of Local and Global Self-Attention Spaces](https://aclanthology.org/2021.hackashop-1.11) (Škrlj et al., Hackashop 2021)
ACL