@InProceedings{delbrouck-dupont:2017:EMNLP2017,
  author    = {Delbrouck, Jean-Benoit  and  Dupont, St\'{e}phane},
  title     = {An empirical study on the effectiveness of images in Multimodal Neural Machine Translation},
  booktitle = {Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing},
  month     = {September},
  year      = {2017},
  address   = {Copenhagen, Denmark},
  publisher = {Association for Computational Linguistics},
  pages     = {910--919},
  abstract  = {In state-of-the-art Neural Machine Trans-
	lation (NMT), an attention mechanism is
	used during decoding to enhance the trans-
	lation. At every step, the decoder uses this
	mechanism to focus on different parts of
	the source sentence to gather the most use-
	ful information before outputting its tar-
	get word. Recently, the effectiveness of
	the attention mechanism has also been ex-
	plored for multi-modal tasks, where it be-
	comes possible to focus both on sentence
	parts and image regions that they describe.
	In this paper, we compare several atten-
	tion mechanism on the multi-modal trans-
	lation task (English, image → German)
	and evaluate the ability of the model to
	make use of images to improve translation.
	We surpass state-of-the-art scores on the
	Multi30k data set, we nevertheless iden-
	tify and report different misbehavior of the
	machine while translating.},
  url       = {https://www.aclweb.org/anthology/D17-1095}
}

