@inproceedings{kershaw-bober-2019-lexical,
title = "The Lexical Gap: An Improved Measure of Automated Image Description Quality",
author = "Kershaw, Austin and
Bober, Miroslaw",
editor = "Dobnik, Simon and
Chatzikyriakidis, Stergios and
Demberg, Vera and
Abu Kwaik, Kathrein and
Maraev, Vladislav",
booktitle = "Proceedings of the 13th International Conference on Computational Semantics - Student Papers",
month = may,
year = "2019",
address = "Gothenburg, Sweden",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-0603/",
doi = "10.18653/v1/W19-0603",
pages = "15--23",
abstract = "The challenge of automatically describing images and videos has stimulated much research in Computer Vision and Natural Language Processing. In order to test the semantic abilities of new algorithms, we need reliable and objective ways of measuring progress. We show that standard evaluation measures do not take into account the semantic richness of a description, and give the impression that sparse machine descriptions outperform rich human descriptions. We introduce and test a new measure of semantic ability based on relative lexical diversity. We show how our measure can work alongside existing measures to achieve state of the art correlation with human judgement of quality. We also introduce a new dataset: Rich-Sparse Descriptions, which provides 2K human and machine descriptions to stimulate interest into the semantic evaluation of machine descriptions."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kershaw-bober-2019-lexical">
<titleInfo>
<title>The Lexical Gap: An Improved Measure of Automated Image Description Quality</title>
</titleInfo>
<name type="personal">
<namePart type="given">Austin</namePart>
<namePart type="family">Kershaw</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Miroslaw</namePart>
<namePart type="family">Bober</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th International Conference on Computational Semantics - Student Papers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Dobnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stergios</namePart>
<namePart type="family">Chatzikyriakidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kathrein</namePart>
<namePart type="family">Abu Kwaik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vladislav</namePart>
<namePart type="family">Maraev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gothenburg, Sweden</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The challenge of automatically describing images and videos has stimulated much research in Computer Vision and Natural Language Processing. In order to test the semantic abilities of new algorithms, we need reliable and objective ways of measuring progress. We show that standard evaluation measures do not take into account the semantic richness of a description, and give the impression that sparse machine descriptions outperform rich human descriptions. We introduce and test a new measure of semantic ability based on relative lexical diversity. We show how our measure can work alongside existing measures to achieve state of the art correlation with human judgement of quality. We also introduce a new dataset: Rich-Sparse Descriptions, which provides 2K human and machine descriptions to stimulate interest into the semantic evaluation of machine descriptions.</abstract>
<identifier type="citekey">kershaw-bober-2019-lexical</identifier>
<identifier type="doi">10.18653/v1/W19-0603</identifier>
<location>
<url>https://aclanthology.org/W19-0603/</url>
</location>
<part>
<date>2019-05</date>
<extent unit="page">
<start>15</start>
<end>23</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Lexical Gap: An Improved Measure of Automated Image Description Quality
%A Kershaw, Austin
%A Bober, Miroslaw
%Y Dobnik, Simon
%Y Chatzikyriakidis, Stergios
%Y Demberg, Vera
%Y Abu Kwaik, Kathrein
%Y Maraev, Vladislav
%S Proceedings of the 13th International Conference on Computational Semantics - Student Papers
%D 2019
%8 May
%I Association for Computational Linguistics
%C Gothenburg, Sweden
%F kershaw-bober-2019-lexical
%X The challenge of automatically describing images and videos has stimulated much research in Computer Vision and Natural Language Processing. In order to test the semantic abilities of new algorithms, we need reliable and objective ways of measuring progress. We show that standard evaluation measures do not take into account the semantic richness of a description, and give the impression that sparse machine descriptions outperform rich human descriptions. We introduce and test a new measure of semantic ability based on relative lexical diversity. We show how our measure can work alongside existing measures to achieve state of the art correlation with human judgement of quality. We also introduce a new dataset: Rich-Sparse Descriptions, which provides 2K human and machine descriptions to stimulate interest into the semantic evaluation of machine descriptions.
%R 10.18653/v1/W19-0603
%U https://aclanthology.org/W19-0603/
%U https://doi.org/10.18653/v1/W19-0603
%P 15-23
Markdown (Informal)
[The Lexical Gap: An Improved Measure of Automated Image Description Quality](https://aclanthology.org/W19-0603/) (Kershaw & Bober, IWCS 2019)
ACL