@inproceedings{elliott-kadar-2017-imagination,
title = "Imagination Improves Multimodal Translation",
author = "Elliott, Desmond and
K{\'a}d{\'a}r, {\'A}kos",
editor = "Kondrak, Greg and
Watanabe, Taro",
booktitle = "Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers)",
month = nov,
year = "2017",
address = "Taipei, Taiwan",
publisher = "Asian Federation of Natural Language Processing",
url = "https://aclanthology.org/I17-1014",
pages = "130--141",
abstract = "We decompose multimodal translation into two sub-tasks: learning to translate and learning visually grounded representations. In a multitask learning framework, translations are learned in an attention-based encoder-decoder, and grounded representations are learned through image representation prediction. Our approach improves translation performance compared to the state of the art on the Multi30K dataset. Furthermore, it is equally effective if we train the image prediction task on the external MS COCO dataset, and we find improvements if we train the translation model on the external News Commentary parallel text.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="elliott-kadar-2017-imagination">
<titleInfo>
<title>Imagination Improves Multimodal Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Desmond</namePart>
<namePart type="family">Elliott</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ákos</namePart>
<namePart type="family">Kádár</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Greg</namePart>
<namePart type="family">Kondrak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Taro</namePart>
<namePart type="family">Watanabe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Asian Federation of Natural Language Processing</publisher>
<place>
<placeTerm type="text">Taipei, Taiwan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We decompose multimodal translation into two sub-tasks: learning to translate and learning visually grounded representations. In a multitask learning framework, translations are learned in an attention-based encoder-decoder, and grounded representations are learned through image representation prediction. Our approach improves translation performance compared to the state of the art on the Multi30K dataset. Furthermore, it is equally effective if we train the image prediction task on the external MS COCO dataset, and we find improvements if we train the translation model on the external News Commentary parallel text.</abstract>
<identifier type="citekey">elliott-kadar-2017-imagination</identifier>
<location>
<url>https://aclanthology.org/I17-1014</url>
</location>
<part>
<date>2017-11</date>
<extent unit="page">
<start>130</start>
<end>141</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Imagination Improves Multimodal Translation
%A Elliott, Desmond
%A Kádár, Ákos
%Y Kondrak, Greg
%Y Watanabe, Taro
%S Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers)
%D 2017
%8 November
%I Asian Federation of Natural Language Processing
%C Taipei, Taiwan
%F elliott-kadar-2017-imagination
%X We decompose multimodal translation into two sub-tasks: learning to translate and learning visually grounded representations. In a multitask learning framework, translations are learned in an attention-based encoder-decoder, and grounded representations are learned through image representation prediction. Our approach improves translation performance compared to the state of the art on the Multi30K dataset. Furthermore, it is equally effective if we train the image prediction task on the external MS COCO dataset, and we find improvements if we train the translation model on the external News Commentary parallel text.
%U https://aclanthology.org/I17-1014
%P 130-141
Markdown (Informal)
[Imagination Improves Multimodal Translation](https://aclanthology.org/I17-1014) (Elliott & Kádár, IJCNLP 2017)
ACL
- Desmond Elliott and Ákos Kádár. 2017. Imagination Improves Multimodal Translation. In Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers), pages 130–141, Taipei, Taiwan. Asian Federation of Natural Language Processing.