@inproceedings{gulcehre-etal-2017-plan,
title = "Plan, Attend, Generate: Character-Level Neural Machine Translation with Planning",
author = "Gulcehre, Caglar and
Dutil, Francis and
Trischler, Adam and
Bengio, Yoshua",
booktitle = "Proceedings of the 2nd Workshop on Representation Learning for {NLP}",
month = aug,
year = "2017",
address = "Vancouver, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-2627",
doi = "10.18653/v1/W17-2627",
pages = "228--234",
abstract = "We investigate the integration of a planning mechanism into an encoder-decoder architecture with attention. We develop a model that can plan ahead when it computes alignments between the source and target sequences not only for a single time-step but for the next k time-steps as well by constructing a matrix of proposed future alignments and a commitment vector that governs whether to follow or recompute the plan. This mechanism is inspired by strategic attentive reader and writer (STRAW) model, a recent neural architecture for planning with hierarchical reinforcement learning that can also learn higher level temporal abstractions. Our proposed model is end-to-end trainable with differentiable operations. We show that our model outperforms strong baselines on character-level translation task from WMT{'}15 with fewer parameters and computes alignments that are qualitatively intuitive.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gulcehre-etal-2017-plan">
<titleInfo>
<title>Plan, Attend, Generate: Character-Level Neural Machine Translation with Planning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Caglar</namePart>
<namePart type="family">Gulcehre</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francis</namePart>
<namePart type="family">Dutil</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adam</namePart>
<namePart type="family">Trischler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yoshua</namePart>
<namePart type="family">Bengio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on Representation Learning for NLP</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vancouver, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We investigate the integration of a planning mechanism into an encoder-decoder architecture with attention. We develop a model that can plan ahead when it computes alignments between the source and target sequences not only for a single time-step but for the next k time-steps as well by constructing a matrix of proposed future alignments and a commitment vector that governs whether to follow or recompute the plan. This mechanism is inspired by strategic attentive reader and writer (STRAW) model, a recent neural architecture for planning with hierarchical reinforcement learning that can also learn higher level temporal abstractions. Our proposed model is end-to-end trainable with differentiable operations. We show that our model outperforms strong baselines on character-level translation task from WMT’15 with fewer parameters and computes alignments that are qualitatively intuitive.</abstract>
<identifier type="citekey">gulcehre-etal-2017-plan</identifier>
<identifier type="doi">10.18653/v1/W17-2627</identifier>
<location>
<url>https://aclanthology.org/W17-2627</url>
</location>
<part>
<date>2017-08</date>
<extent unit="page">
<start>228</start>
<end>234</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Plan, Attend, Generate: Character-Level Neural Machine Translation with Planning
%A Gulcehre, Caglar
%A Dutil, Francis
%A Trischler, Adam
%A Bengio, Yoshua
%S Proceedings of the 2nd Workshop on Representation Learning for NLP
%D 2017
%8 August
%I Association for Computational Linguistics
%C Vancouver, Canada
%F gulcehre-etal-2017-plan
%X We investigate the integration of a planning mechanism into an encoder-decoder architecture with attention. We develop a model that can plan ahead when it computes alignments between the source and target sequences not only for a single time-step but for the next k time-steps as well by constructing a matrix of proposed future alignments and a commitment vector that governs whether to follow or recompute the plan. This mechanism is inspired by strategic attentive reader and writer (STRAW) model, a recent neural architecture for planning with hierarchical reinforcement learning that can also learn higher level temporal abstractions. Our proposed model is end-to-end trainable with differentiable operations. We show that our model outperforms strong baselines on character-level translation task from WMT’15 with fewer parameters and computes alignments that are qualitatively intuitive.
%R 10.18653/v1/W17-2627
%U https://aclanthology.org/W17-2627
%U https://doi.org/10.18653/v1/W17-2627
%P 228-234
Markdown (Informal)
[Plan, Attend, Generate: Character-Level Neural Machine Translation with Planning](https://aclanthology.org/W17-2627) (Gulcehre et al., RepL4NLP 2017)
ACL