@inproceedings{wang-etal-2017-deep-neural,
title = "Deep Neural Machine Translation with Linear Associative Unit",
author = "Wang, Mingxuan and
Lu, Zhengdong and
Zhou, Jie and
Liu, Qun",
editor = "Barzilay, Regina and
Kan, Min-Yen",
booktitle = "Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2017",
address = "Vancouver, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/P17-1013",
doi = "10.18653/v1/P17-1013",
pages = "136--145",
abstract = "Deep Neural Networks (DNNs) have provably enhanced the state-of-the-art Neural Machine Translation (NMT) with its capability in modeling complex functions and capturing complex linguistic structures. However NMT with deep architecture in its encoder or decoder RNNs often suffer from severe gradient diffusion due to the non-linear recurrent activations, which often makes the optimization much more difficult. To address this problem we propose a novel linear associative units (LAU) to reduce the gradient propagation path inside the recurrent unit. Different from conventional approaches (LSTM unit and GRU), LAUs uses linear associative connections between input and output of the recurrent unit, which allows unimpeded information flow through both space and time The model is quite simple, but it is surprisingly effective. Our empirical study on Chinese-English translation shows that our model with proper configuration can improve by 11.7 BLEU upon Groundhog and the best reported on results in the same setting. On WMT14 English-German task and a larger WMT14 English-French task, our model achieves comparable results with the state-of-the-art.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-etal-2017-deep-neural">
<titleInfo>
<title>Deep Neural Machine Translation with Linear Associative Unit</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mingxuan</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhengdong</namePart>
<namePart type="family">Lu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jie</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qun</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Regina</namePart>
<namePart type="family">Barzilay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vancouver, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Deep Neural Networks (DNNs) have provably enhanced the state-of-the-art Neural Machine Translation (NMT) with its capability in modeling complex functions and capturing complex linguistic structures. However NMT with deep architecture in its encoder or decoder RNNs often suffer from severe gradient diffusion due to the non-linear recurrent activations, which often makes the optimization much more difficult. To address this problem we propose a novel linear associative units (LAU) to reduce the gradient propagation path inside the recurrent unit. Different from conventional approaches (LSTM unit and GRU), LAUs uses linear associative connections between input and output of the recurrent unit, which allows unimpeded information flow through both space and time The model is quite simple, but it is surprisingly effective. Our empirical study on Chinese-English translation shows that our model with proper configuration can improve by 11.7 BLEU upon Groundhog and the best reported on results in the same setting. On WMT14 English-German task and a larger WMT14 English-French task, our model achieves comparable results with the state-of-the-art.</abstract>
<identifier type="citekey">wang-etal-2017-deep-neural</identifier>
<identifier type="doi">10.18653/v1/P17-1013</identifier>
<location>
<url>https://aclanthology.org/P17-1013</url>
</location>
<part>
<date>2017-07</date>
<extent unit="page">
<start>136</start>
<end>145</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Deep Neural Machine Translation with Linear Associative Unit
%A Wang, Mingxuan
%A Lu, Zhengdong
%A Zhou, Jie
%A Liu, Qun
%Y Barzilay, Regina
%Y Kan, Min-Yen
%S Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2017
%8 July
%I Association for Computational Linguistics
%C Vancouver, Canada
%F wang-etal-2017-deep-neural
%X Deep Neural Networks (DNNs) have provably enhanced the state-of-the-art Neural Machine Translation (NMT) with its capability in modeling complex functions and capturing complex linguistic structures. However NMT with deep architecture in its encoder or decoder RNNs often suffer from severe gradient diffusion due to the non-linear recurrent activations, which often makes the optimization much more difficult. To address this problem we propose a novel linear associative units (LAU) to reduce the gradient propagation path inside the recurrent unit. Different from conventional approaches (LSTM unit and GRU), LAUs uses linear associative connections between input and output of the recurrent unit, which allows unimpeded information flow through both space and time The model is quite simple, but it is surprisingly effective. Our empirical study on Chinese-English translation shows that our model with proper configuration can improve by 11.7 BLEU upon Groundhog and the best reported on results in the same setting. On WMT14 English-German task and a larger WMT14 English-French task, our model achieves comparable results with the state-of-the-art.
%R 10.18653/v1/P17-1013
%U https://aclanthology.org/P17-1013
%U https://doi.org/10.18653/v1/P17-1013
%P 136-145
Markdown (Informal)
[Deep Neural Machine Translation with Linear Associative Unit](https://aclanthology.org/P17-1013) (Wang et al., ACL 2017)
ACL
- Mingxuan Wang, Zhengdong Lu, Jie Zhou, and Qun Liu. 2017. Deep Neural Machine Translation with Linear Associative Unit. In Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 136–145, Vancouver, Canada. Association for Computational Linguistics.