@inproceedings{rosendahl-etal-2021-recurrent,
title = "Recurrent Attention for the Transformer",
author = "Rosendahl, Jan and
Herold, Christian and
Petrick, Frithjof and
Ney, Hermann",
editor = "Sedoc, Jo{\~a}o and
Rogers, Anna and
Rumshisky, Anna and
Tafreshi, Shabnam",
booktitle = "Proceedings of the Second Workshop on Insights from Negative Results in NLP",
month = nov,
year = "2021",
address = "Online and Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.insights-1.10",
doi = "10.18653/v1/2021.insights-1.10",
pages = "62--66",
abstract = "In this work, we conduct a comprehensive investigation on one of the centerpieces of modern machine translation systems: the encoder-decoder attention mechanism. Motivated by the concept of first-order alignments, we extend the (cross-)attention mechanism by a recurrent connection, allowing direct access to previous attention/alignment decisions. We propose several ways to include such a recurrency into the attention mechanism. Verifying their performance across different translation tasks we conclude that these extensions and dependencies are not beneficial for the translation performance of the Transformer architecture.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rosendahl-etal-2021-recurrent">
<titleInfo>
<title>Recurrent Attention for the Transformer</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Rosendahl</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Herold</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frithjof</namePart>
<namePart type="family">Petrick</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hermann</namePart>
<namePart type="family">Ney</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Insights from Negative Results in NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">João</namePart>
<namePart type="family">Sedoc</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rumshisky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shabnam</namePart>
<namePart type="family">Tafreshi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online and Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this work, we conduct a comprehensive investigation on one of the centerpieces of modern machine translation systems: the encoder-decoder attention mechanism. Motivated by the concept of first-order alignments, we extend the (cross-)attention mechanism by a recurrent connection, allowing direct access to previous attention/alignment decisions. We propose several ways to include such a recurrency into the attention mechanism. Verifying their performance across different translation tasks we conclude that these extensions and dependencies are not beneficial for the translation performance of the Transformer architecture.</abstract>
<identifier type="citekey">rosendahl-etal-2021-recurrent</identifier>
<identifier type="doi">10.18653/v1/2021.insights-1.10</identifier>
<location>
<url>https://aclanthology.org/2021.insights-1.10</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>62</start>
<end>66</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Recurrent Attention for the Transformer
%A Rosendahl, Jan
%A Herold, Christian
%A Petrick, Frithjof
%A Ney, Hermann
%Y Sedoc, João
%Y Rogers, Anna
%Y Rumshisky, Anna
%Y Tafreshi, Shabnam
%S Proceedings of the Second Workshop on Insights from Negative Results in NLP
%D 2021
%8 November
%I Association for Computational Linguistics
%C Online and Punta Cana, Dominican Republic
%F rosendahl-etal-2021-recurrent
%X In this work, we conduct a comprehensive investigation on one of the centerpieces of modern machine translation systems: the encoder-decoder attention mechanism. Motivated by the concept of first-order alignments, we extend the (cross-)attention mechanism by a recurrent connection, allowing direct access to previous attention/alignment decisions. We propose several ways to include such a recurrency into the attention mechanism. Verifying their performance across different translation tasks we conclude that these extensions and dependencies are not beneficial for the translation performance of the Transformer architecture.
%R 10.18653/v1/2021.insights-1.10
%U https://aclanthology.org/2021.insights-1.10
%U https://doi.org/10.18653/v1/2021.insights-1.10
%P 62-66
Markdown (Informal)
[Recurrent Attention for the Transformer](https://aclanthology.org/2021.insights-1.10) (Rosendahl et al., insights 2021)
ACL
- Jan Rosendahl, Christian Herold, Frithjof Petrick, and Hermann Ney. 2021. Recurrent Attention for the Transformer. In Proceedings of the Second Workshop on Insights from Negative Results in NLP, pages 62–66, Online and Punta Cana, Dominican Republic. Association for Computational Linguistics.