@inproceedings{dutta-chowdhury-etal-2018-multimodal,
title = "Multimodal Neural Machine Translation for Low-resource Language Pairs using Synthetic Data",
author = "Dutta Chowdhury, Koel and
Hasanuzzaman, Mohammed and
Liu, Qun",
editor = "Haffari, Reza and
Cherry, Colin and
Foster, George and
Khadivi, Shahram and
Salehi, Bahar",
booktitle = "Proceedings of the Workshop on Deep Learning Approaches for Low-Resource {NLP}",
month = jul,
year = "2018",
address = "Melbourne",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-3405/",
doi = "10.18653/v1/W18-3405",
pages = "33--42",
abstract = "In this paper, we investigate the effectiveness of training a multimodal neural machine translation (MNMT) system with image features for a low-resource language pair, Hindi and English, using synthetic data. A three-way parallel corpus which contains bilingual texts and corresponding images is required to train a MNMT system with image features. However, such a corpus is not available for low resource language pairs. To address this, we developed both a synthetic training dataset and a manually curated development/test dataset for Hindi based on an existing English-image parallel corpus. We used these datasets to build our image description translation system by adopting state-of-the-art MNMT models. Our results show that it is possible to train a MNMT system for low-resource language pairs through the use of synthetic data and that such a system can benefit from image features."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dutta-chowdhury-etal-2018-multimodal">
<titleInfo>
<title>Multimodal Neural Machine Translation for Low-resource Language Pairs using Synthetic Data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Koel</namePart>
<namePart type="family">Dutta Chowdhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammed</namePart>
<namePart type="family">Hasanuzzaman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qun</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Deep Learning Approaches for Low-Resource NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Reza</namePart>
<namePart type="family">Haffari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Colin</namePart>
<namePart type="family">Cherry</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">George</namePart>
<namePart type="family">Foster</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shahram</namePart>
<namePart type="family">Khadivi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bahar</namePart>
<namePart type="family">Salehi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Melbourne</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we investigate the effectiveness of training a multimodal neural machine translation (MNMT) system with image features for a low-resource language pair, Hindi and English, using synthetic data. A three-way parallel corpus which contains bilingual texts and corresponding images is required to train a MNMT system with image features. However, such a corpus is not available for low resource language pairs. To address this, we developed both a synthetic training dataset and a manually curated development/test dataset for Hindi based on an existing English-image parallel corpus. We used these datasets to build our image description translation system by adopting state-of-the-art MNMT models. Our results show that it is possible to train a MNMT system for low-resource language pairs through the use of synthetic data and that such a system can benefit from image features.</abstract>
<identifier type="citekey">dutta-chowdhury-etal-2018-multimodal</identifier>
<identifier type="doi">10.18653/v1/W18-3405</identifier>
<location>
<url>https://aclanthology.org/W18-3405/</url>
</location>
<part>
<date>2018-07</date>
<extent unit="page">
<start>33</start>
<end>42</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multimodal Neural Machine Translation for Low-resource Language Pairs using Synthetic Data
%A Dutta Chowdhury, Koel
%A Hasanuzzaman, Mohammed
%A Liu, Qun
%Y Haffari, Reza
%Y Cherry, Colin
%Y Foster, George
%Y Khadivi, Shahram
%Y Salehi, Bahar
%S Proceedings of the Workshop on Deep Learning Approaches for Low-Resource NLP
%D 2018
%8 July
%I Association for Computational Linguistics
%C Melbourne
%F dutta-chowdhury-etal-2018-multimodal
%X In this paper, we investigate the effectiveness of training a multimodal neural machine translation (MNMT) system with image features for a low-resource language pair, Hindi and English, using synthetic data. A three-way parallel corpus which contains bilingual texts and corresponding images is required to train a MNMT system with image features. However, such a corpus is not available for low resource language pairs. To address this, we developed both a synthetic training dataset and a manually curated development/test dataset for Hindi based on an existing English-image parallel corpus. We used these datasets to build our image description translation system by adopting state-of-the-art MNMT models. Our results show that it is possible to train a MNMT system for low-resource language pairs through the use of synthetic data and that such a system can benefit from image features.
%R 10.18653/v1/W18-3405
%U https://aclanthology.org/W18-3405/
%U https://doi.org/10.18653/v1/W18-3405
%P 33-42
Markdown (Informal)
[Multimodal Neural Machine Translation for Low-resource Language Pairs using Synthetic Data](https://aclanthology.org/W18-3405/) (Dutta Chowdhury et al., ACL 2018)
ACL