@InProceedings{duttachowdhury-hasanuzzaman-liu:2018:W18-34,
  author    = {Dutta Chowdhury, Koel  and  Hasanuzzaman, Mohammed  and  Liu, Qun},
  title     = {Multimodal Neural Machine Translation for Low-resource Language Pairs using Synthetic Data},
  booktitle = {Proceedings of the Workshop on Deep Learning Approaches for Low-Resource NLP},
  month     = {July},
  year      = {2018},
  address   = {Melbourne},
  publisher = {Association for Computational Linguistics},
  pages     = {33--42},
  abstract  = {In this paper, we investigate the effectiveness of training a multimodal neu- ral machine translation (MNMT) system with image features for a low-resource language pair, Hindi and English, using synthetic data. A three-way parallel corpus which contains bilingual texts and corresponding images is required to train a MNMT system with image features. However, such a corpus is not available for low resource language pairs. To address this, we developed both a synthetic training dataset and a manually curated development/test dataset for Hindi based on an existing English-image parallel corpus. We used these datasets to build our image description translation system by adopting state-of-the-art MNMT models. Our results show that it is possible to train a MNMT system for low-resource language pairs through the use of synthetic data and that such a system can benefit from image features.},
  url       = {http://www.aclweb.org/anthology/W18-3405}
}

