@inproceedings{jiang-etal-2019-reo,
title = "{REO}-Relevance, Extraness, Omission: A Fine-grained Evaluation for Image Captioning",
author = "Jiang, Ming and
Hu, Junjie and
Huang, Qiuyuan and
Zhang, Lei and
Diesner, Jana and
Gao, Jianfeng",
editor = "Inui, Kentaro and
Jiang, Jing and
Ng, Vincent and
Wan, Xiaojun",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D19-1156",
doi = "10.18653/v1/D19-1156",
pages = "1475--1480",
abstract = "Popular metrics used for evaluating image captioning systems, such as BLEU and CIDEr, provide a single score to gauge the system{'}s overall effectiveness. This score is often not informative enough to indicate what specific errors are made by a given system. In this study, we present a fine-grained evaluation method REO for automatically measuring the performance of image captioning systems. REO assesses the quality of captions from three perspectives: 1) Relevance to the ground truth, 2) Extraness of the content that is irrelevant to the ground truth, and 3) Omission of the elements in the images and human references. Experiments on three benchmark datasets demonstrate that our method achieves a higher consistency with human judgments and provides more intuitive evaluation results than alternative metrics.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jiang-etal-2019-reo">
<titleInfo>
<title>REO-Relevance, Extraness, Omission: A Fine-grained Evaluation for Image Captioning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ming</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junjie</namePart>
<namePart type="family">Hu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qiuyuan</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lei</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jana</namePart>
<namePart type="family">Diesner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jianfeng</namePart>
<namePart type="family">Gao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jing</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vincent</namePart>
<namePart type="family">Ng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaojun</namePart>
<namePart type="family">Wan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hong Kong, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Popular metrics used for evaluating image captioning systems, such as BLEU and CIDEr, provide a single score to gauge the system’s overall effectiveness. This score is often not informative enough to indicate what specific errors are made by a given system. In this study, we present a fine-grained evaluation method REO for automatically measuring the performance of image captioning systems. REO assesses the quality of captions from three perspectives: 1) Relevance to the ground truth, 2) Extraness of the content that is irrelevant to the ground truth, and 3) Omission of the elements in the images and human references. Experiments on three benchmark datasets demonstrate that our method achieves a higher consistency with human judgments and provides more intuitive evaluation results than alternative metrics.</abstract>
<identifier type="citekey">jiang-etal-2019-reo</identifier>
<identifier type="doi">10.18653/v1/D19-1156</identifier>
<location>
<url>https://aclanthology.org/D19-1156</url>
</location>
<part>
<date>2019-11</date>
<extent unit="page">
<start>1475</start>
<end>1480</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T REO-Relevance, Extraness, Omission: A Fine-grained Evaluation for Image Captioning
%A Jiang, Ming
%A Hu, Junjie
%A Huang, Qiuyuan
%A Zhang, Lei
%A Diesner, Jana
%A Gao, Jianfeng
%Y Inui, Kentaro
%Y Jiang, Jing
%Y Ng, Vincent
%Y Wan, Xiaojun
%S Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)
%D 2019
%8 November
%I Association for Computational Linguistics
%C Hong Kong, China
%F jiang-etal-2019-reo
%X Popular metrics used for evaluating image captioning systems, such as BLEU and CIDEr, provide a single score to gauge the system’s overall effectiveness. This score is often not informative enough to indicate what specific errors are made by a given system. In this study, we present a fine-grained evaluation method REO for automatically measuring the performance of image captioning systems. REO assesses the quality of captions from three perspectives: 1) Relevance to the ground truth, 2) Extraness of the content that is irrelevant to the ground truth, and 3) Omission of the elements in the images and human references. Experiments on three benchmark datasets demonstrate that our method achieves a higher consistency with human judgments and provides more intuitive evaluation results than alternative metrics.
%R 10.18653/v1/D19-1156
%U https://aclanthology.org/D19-1156
%U https://doi.org/10.18653/v1/D19-1156
%P 1475-1480
Markdown (Informal)
[REO-Relevance, Extraness, Omission: A Fine-grained Evaluation for Image Captioning](https://aclanthology.org/D19-1156) (Jiang et al., EMNLP-IJCNLP 2019)
ACL
- Ming Jiang, Junjie Hu, Qiuyuan Huang, Lei Zhang, Jana Diesner, and Jianfeng Gao. 2019. REO-Relevance, Extraness, Omission: A Fine-grained Evaluation for Image Captioning. In Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP), pages 1475–1480, Hong Kong, China. Association for Computational Linguistics.