@InProceedings{calixto-EtAl:2017:VL17,
  author    = {Calixto, Iacer  and  Stein, Daniel  and  Matusov, Evgeny  and  Castilho, Sheila  and  Way, Andy},
  title     = {Human Evaluation of Multi-modal Neural Machine Translation: A Case-Study on E-Commerce Listing Titles},
  booktitle = {Proceedings of the Sixth Workshop on Vision and Language},
  month     = {April},
  year      = {2017},
  address   = {Valencia, Spain},
  publisher = {Association for Computational Linguistics},
  pages     = {31--37},
  abstract  = {In this paper, we study how humans perceive the use of images as an additional
	knowledge source to machine-translate user-generated product listings in an
	e-commerce company. We conduct a human evaluation where we assess how a
	multi-modal neural machine translation (NMT) model compares to two text-only
	approaches: a conventional state-of-the-art attention-based NMT and a
	phrase-based statistical machine translation (PBSMT) model. We evaluate
	translations obtained with different systems and also discuss the data set of
	user-generated product listings, which in our case comprises both product
	listings and associated images. We found that humans preferred translations
	obtained with a PBSMT system to both text-only and multi-modal NMT over 56% of
	the time. Nonetheless, human evaluators ranked translations from a multi-modal
	NMT model as better than those of a text-only NMT over 88% of the time, which
	suggests that images do help NMT in this use-case.},
  url       = {http://www.aclweb.org/anthology/W17-2004}
}

