@inproceedings{kilickaya-etal-2017-evaluating,
title = "Re-evaluating Automatic Metrics for Image Captioning",
author = "Kilickaya, Mert and
Erdem, Aykut and
Ikizler-Cinbis, Nazli and
Erdem, Erkut",
editor = "Lapata, Mirella and
Blunsom, Phil and
Koller, Alexander",
booktitle = "Proceedings of the 15th Conference of the {E}uropean Chapter of the Association for Computational Linguistics: Volume 1, Long Papers",
month = apr,
year = "2017",
address = "Valencia, Spain",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/E17-1019",
pages = "199--209",
abstract = "The task of generating natural language descriptions from images has received a lot of attention in recent years. Consequently, it is becoming increasingly important to evaluate such image captioning approaches in an automatic manner. In this paper, we provide an in-depth evaluation of the existing image captioning metrics through a series of carefully designed experiments. Moreover, we explore the utilization of the recently proposed Word Mover{'}s Distance (WMD) document metric for the purpose of image captioning. Our findings outline the differences and/or similarities between metrics and their relative robustness by means of extensive correlation, accuracy and distraction based evaluations. Our results also demonstrate that WMD provides strong advantages over other metrics.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kilickaya-etal-2017-evaluating">
<titleInfo>
<title>Re-evaluating Automatic Metrics for Image Captioning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mert</namePart>
<namePart type="family">Kilickaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aykut</namePart>
<namePart type="family">Erdem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nazli</namePart>
<namePart type="family">Ikizler-Cinbis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Erkut</namePart>
<namePart type="family">Erdem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 1, Long Papers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mirella</namePart>
<namePart type="family">Lapata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Phil</namePart>
<namePart type="family">Blunsom</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Koller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Valencia, Spain</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The task of generating natural language descriptions from images has received a lot of attention in recent years. Consequently, it is becoming increasingly important to evaluate such image captioning approaches in an automatic manner. In this paper, we provide an in-depth evaluation of the existing image captioning metrics through a series of carefully designed experiments. Moreover, we explore the utilization of the recently proposed Word Mover’s Distance (WMD) document metric for the purpose of image captioning. Our findings outline the differences and/or similarities between metrics and their relative robustness by means of extensive correlation, accuracy and distraction based evaluations. Our results also demonstrate that WMD provides strong advantages over other metrics.</abstract>
<identifier type="citekey">kilickaya-etal-2017-evaluating</identifier>
<location>
<url>https://aclanthology.org/E17-1019</url>
</location>
<part>
<date>2017-04</date>
<extent unit="page">
<start>199</start>
<end>209</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Re-evaluating Automatic Metrics for Image Captioning
%A Kilickaya, Mert
%A Erdem, Aykut
%A Ikizler-Cinbis, Nazli
%A Erdem, Erkut
%Y Lapata, Mirella
%Y Blunsom, Phil
%Y Koller, Alexander
%S Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 1, Long Papers
%D 2017
%8 April
%I Association for Computational Linguistics
%C Valencia, Spain
%F kilickaya-etal-2017-evaluating
%X The task of generating natural language descriptions from images has received a lot of attention in recent years. Consequently, it is becoming increasingly important to evaluate such image captioning approaches in an automatic manner. In this paper, we provide an in-depth evaluation of the existing image captioning metrics through a series of carefully designed experiments. Moreover, we explore the utilization of the recently proposed Word Mover’s Distance (WMD) document metric for the purpose of image captioning. Our findings outline the differences and/or similarities between metrics and their relative robustness by means of extensive correlation, accuracy and distraction based evaluations. Our results also demonstrate that WMD provides strong advantages over other metrics.
%U https://aclanthology.org/E17-1019
%P 199-209
Markdown (Informal)
[Re-evaluating Automatic Metrics for Image Captioning](https://aclanthology.org/E17-1019) (Kilickaya et al., EACL 2017)
ACL
- Mert Kilickaya, Aykut Erdem, Nazli Ikizler-Cinbis, and Erkut Erdem. 2017. Re-evaluating Automatic Metrics for Image Captioning. In Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 1, Long Papers, pages 199–209, Valencia, Spain. Association for Computational Linguistics.