@inproceedings{calixto-etal-2017-human,
title = "Human Evaluation of Multi-modal Neural Machine Translation: A Case-Study on {E}-Commerce Listing Titles",
author = "Calixto, Iacer and
Stein, Daniel and
Matusov, Evgeny and
Castilho, Sheila and
Way, Andy",
editor = "Belz, Anya and
Erdem, Erkut and
Pastra, Katerina and
Mikolajczyk, Krystian",
booktitle = "Proceedings of the Sixth Workshop on Vision and Language",
month = apr,
year = "2017",
address = "Valencia, Spain",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-2004/",
doi = "10.18653/v1/W17-2004",
pages = "31--37",
abstract = "In this paper, we study how humans perceive the use of images as an additional knowledge source to machine-translate user-generated product listings in an e-commerce company. We conduct a human evaluation where we assess how a multi-modal neural machine translation (NMT) model compares to two text-only approaches: a conventional state-of-the-art attention-based NMT and a phrase-based statistical machine translation (PBSMT) model. We evaluate translations obtained with different systems and also discuss the data set of user-generated product listings, which in our case comprises both product listings and associated images. We found that humans preferred translations obtained with a PBSMT system to both text-only and multi-modal NMT over 56{\%} of the time. Nonetheless, human evaluators ranked translations from a multi-modal NMT model as better than those of a text-only NMT over 88{\%} of the time, which suggests that images do help NMT in this use-case."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="calixto-etal-2017-human">
<titleInfo>
<title>Human Evaluation of Multi-modal Neural Machine Translation: A Case-Study on E-Commerce Listing Titles</title>
</titleInfo>
<name type="personal">
<namePart type="given">Iacer</namePart>
<namePart type="family">Calixto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Stein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Evgeny</namePart>
<namePart type="family">Matusov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sheila</namePart>
<namePart type="family">Castilho</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andy</namePart>
<namePart type="family">Way</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth Workshop on Vision and Language</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anya</namePart>
<namePart type="family">Belz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Erkut</namePart>
<namePart type="family">Erdem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katerina</namePart>
<namePart type="family">Pastra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Krystian</namePart>
<namePart type="family">Mikolajczyk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Valencia, Spain</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we study how humans perceive the use of images as an additional knowledge source to machine-translate user-generated product listings in an e-commerce company. We conduct a human evaluation where we assess how a multi-modal neural machine translation (NMT) model compares to two text-only approaches: a conventional state-of-the-art attention-based NMT and a phrase-based statistical machine translation (PBSMT) model. We evaluate translations obtained with different systems and also discuss the data set of user-generated product listings, which in our case comprises both product listings and associated images. We found that humans preferred translations obtained with a PBSMT system to both text-only and multi-modal NMT over 56% of the time. Nonetheless, human evaluators ranked translations from a multi-modal NMT model as better than those of a text-only NMT over 88% of the time, which suggests that images do help NMT in this use-case.</abstract>
<identifier type="citekey">calixto-etal-2017-human</identifier>
<identifier type="doi">10.18653/v1/W17-2004</identifier>
<location>
<url>https://aclanthology.org/W17-2004/</url>
</location>
<part>
<date>2017-04</date>
<extent unit="page">
<start>31</start>
<end>37</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Human Evaluation of Multi-modal Neural Machine Translation: A Case-Study on E-Commerce Listing Titles
%A Calixto, Iacer
%A Stein, Daniel
%A Matusov, Evgeny
%A Castilho, Sheila
%A Way, Andy
%Y Belz, Anya
%Y Erdem, Erkut
%Y Pastra, Katerina
%Y Mikolajczyk, Krystian
%S Proceedings of the Sixth Workshop on Vision and Language
%D 2017
%8 April
%I Association for Computational Linguistics
%C Valencia, Spain
%F calixto-etal-2017-human
%X In this paper, we study how humans perceive the use of images as an additional knowledge source to machine-translate user-generated product listings in an e-commerce company. We conduct a human evaluation where we assess how a multi-modal neural machine translation (NMT) model compares to two text-only approaches: a conventional state-of-the-art attention-based NMT and a phrase-based statistical machine translation (PBSMT) model. We evaluate translations obtained with different systems and also discuss the data set of user-generated product listings, which in our case comprises both product listings and associated images. We found that humans preferred translations obtained with a PBSMT system to both text-only and multi-modal NMT over 56% of the time. Nonetheless, human evaluators ranked translations from a multi-modal NMT model as better than those of a text-only NMT over 88% of the time, which suggests that images do help NMT in this use-case.
%R 10.18653/v1/W17-2004
%U https://aclanthology.org/W17-2004/
%U https://doi.org/10.18653/v1/W17-2004
%P 31-37
Markdown (Informal)
[Human Evaluation of Multi-modal Neural Machine Translation: A Case-Study on E-Commerce Listing Titles](https://aclanthology.org/W17-2004/) (Calixto et al., VL 2017)
ACL