@inproceedings{deguchi-etal-2019-dependency,
title = "Dependency-Based Self-Attention for Transformer {NMT}",
author = "Deguchi, Hiroyuki and
Tamura, Akihiro and
Ninomiya, Takashi",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)",
month = sep,
year = "2019",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/R19-1028/",
doi = "10.26615/978-954-452-056-4_028",
pages = "239--246",
abstract = "In this paper, we propose a new Transformer neural machine translation (NMT) model that incorporates dependency relations into self-attention on both source and target sides, dependency-based self-attention. The dependency-based self-attention is trained to attend to the modifiee for each token under constraints based on the dependency relations, inspired by Linguistically-Informed Self-Attention (LISA). While LISA is originally proposed for Transformer encoder for semantic role labeling, this paper extends LISA to Transformer NMT by masking future information on words in the decoder-side dependency-based self-attention. Additionally, our dependency-based self-attention operates at sub-word units created by byte pair encoding. The experiments show that our model improves 1.0 BLEU points over the baseline model on the WAT`18 Asian Scientific Paper Excerpt Corpus Japanese-to-English translation task."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="deguchi-etal-2019-dependency">
<titleInfo>
<title>Dependency-Based Self-Attention for Transformer NMT</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hiroyuki</namePart>
<namePart type="family">Deguchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Akihiro</namePart>
<namePart type="family">Tamura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Takashi</namePart>
<namePart type="family">Ninomiya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we propose a new Transformer neural machine translation (NMT) model that incorporates dependency relations into self-attention on both source and target sides, dependency-based self-attention. The dependency-based self-attention is trained to attend to the modifiee for each token under constraints based on the dependency relations, inspired by Linguistically-Informed Self-Attention (LISA). While LISA is originally proposed for Transformer encoder for semantic role labeling, this paper extends LISA to Transformer NMT by masking future information on words in the decoder-side dependency-based self-attention. Additionally, our dependency-based self-attention operates at sub-word units created by byte pair encoding. The experiments show that our model improves 1.0 BLEU points over the baseline model on the WAT‘18 Asian Scientific Paper Excerpt Corpus Japanese-to-English translation task.</abstract>
<identifier type="citekey">deguchi-etal-2019-dependency</identifier>
<identifier type="doi">10.26615/978-954-452-056-4_028</identifier>
<location>
<url>https://aclanthology.org/R19-1028/</url>
</location>
<part>
<date>2019-09</date>
<extent unit="page">
<start>239</start>
<end>246</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Dependency-Based Self-Attention for Transformer NMT
%A Deguchi, Hiroyuki
%A Tamura, Akihiro
%A Ninomiya, Takashi
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)
%D 2019
%8 September
%I INCOMA Ltd.
%C Varna, Bulgaria
%F deguchi-etal-2019-dependency
%X In this paper, we propose a new Transformer neural machine translation (NMT) model that incorporates dependency relations into self-attention on both source and target sides, dependency-based self-attention. The dependency-based self-attention is trained to attend to the modifiee for each token under constraints based on the dependency relations, inspired by Linguistically-Informed Self-Attention (LISA). While LISA is originally proposed for Transformer encoder for semantic role labeling, this paper extends LISA to Transformer NMT by masking future information on words in the decoder-side dependency-based self-attention. Additionally, our dependency-based self-attention operates at sub-word units created by byte pair encoding. The experiments show that our model improves 1.0 BLEU points over the baseline model on the WAT‘18 Asian Scientific Paper Excerpt Corpus Japanese-to-English translation task.
%R 10.26615/978-954-452-056-4_028
%U https://aclanthology.org/R19-1028/
%U https://doi.org/10.26615/978-954-452-056-4_028
%P 239-246
Markdown (Informal)
[Dependency-Based Self-Attention for Transformer NMT](https://aclanthology.org/R19-1028/) (Deguchi et al., RANLP 2019)
ACL
- Hiroyuki Deguchi, Akihiro Tamura, and Takashi Ninomiya. 2019. Dependency-Based Self-Attention for Transformer NMT. In Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019), pages 239–246, Varna, Bulgaria. INCOMA Ltd..