@inproceedings{li-etal-2024-assessing,
title = "Assessing Image-Captioning Models: A Novel Framework Integrating Statistical Analysis and Metric Patterns",
author = "Li, Qiaomu and
Xie, Ying and
Grundlingh, Nina and
Chawan, Varsha Rani and
Wang, Cody",
editor = "Malmasi, Shervin and
Fetahu, Besnik and
Ueffing, Nicola and
Rokhlenko, Oleg and
Agichtein, Eugene and
Guy, Ido",
booktitle = "Proceedings of the Seventh Workshop on e-Commerce and NLP @ LREC-COLING 2024",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.ecnlp-1.9",
pages = "79--87",
abstract = "In this study, we present a novel evaluation framework for image-captioning models that integrate statistical analysis with common evaluation metrics, utilizing two popular datasets, FashionGen and Amazon, with contrasting dataset variation to evaluate four models: Video-LLaVa, BLIP, CoCa and ViT-GPT2. Our approach not only reveals the comparative strengths of models, offering insights into their adaptability and applicability in real-world scenarios but also contributes to the field by providing a comprehensive evaluation method that considers both statistical significance and practical relevance to guide the selection of models for specific applications. Specifically, we propose Rank Score as a new evaluation metric that is designed for e-commerce image search applications and employ CLIP Score to quantify dataset variation to offer a holistic view of model performance.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="li-etal-2024-assessing">
<titleInfo>
<title>Assessing Image-Captioning Models: A Novel Framework Integrating Statistical Analysis and Metric Patterns</title>
</titleInfo>
<name type="personal">
<namePart type="given">Qiaomu</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ying</namePart>
<namePart type="family">Xie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nina</namePart>
<namePart type="family">Grundlingh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Varsha</namePart>
<namePart type="given">Rani</namePart>
<namePart type="family">Chawan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cody</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh Workshop on e-Commerce and NLP @ LREC-COLING 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shervin</namePart>
<namePart type="family">Malmasi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Besnik</namePart>
<namePart type="family">Fetahu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nicola</namePart>
<namePart type="family">Ueffing</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oleg</namePart>
<namePart type="family">Rokhlenko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eugene</namePart>
<namePart type="family">Agichtein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ido</namePart>
<namePart type="family">Guy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this study, we present a novel evaluation framework for image-captioning models that integrate statistical analysis with common evaluation metrics, utilizing two popular datasets, FashionGen and Amazon, with contrasting dataset variation to evaluate four models: Video-LLaVa, BLIP, CoCa and ViT-GPT2. Our approach not only reveals the comparative strengths of models, offering insights into their adaptability and applicability in real-world scenarios but also contributes to the field by providing a comprehensive evaluation method that considers both statistical significance and practical relevance to guide the selection of models for specific applications. Specifically, we propose Rank Score as a new evaluation metric that is designed for e-commerce image search applications and employ CLIP Score to quantify dataset variation to offer a holistic view of model performance.</abstract>
<identifier type="citekey">li-etal-2024-assessing</identifier>
<location>
<url>https://aclanthology.org/2024.ecnlp-1.9</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>79</start>
<end>87</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Assessing Image-Captioning Models: A Novel Framework Integrating Statistical Analysis and Metric Patterns
%A Li, Qiaomu
%A Xie, Ying
%A Grundlingh, Nina
%A Chawan, Varsha Rani
%A Wang, Cody
%Y Malmasi, Shervin
%Y Fetahu, Besnik
%Y Ueffing, Nicola
%Y Rokhlenko, Oleg
%Y Agichtein, Eugene
%Y Guy, Ido
%S Proceedings of the Seventh Workshop on e-Commerce and NLP @ LREC-COLING 2024
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F li-etal-2024-assessing
%X In this study, we present a novel evaluation framework for image-captioning models that integrate statistical analysis with common evaluation metrics, utilizing two popular datasets, FashionGen and Amazon, with contrasting dataset variation to evaluate four models: Video-LLaVa, BLIP, CoCa and ViT-GPT2. Our approach not only reveals the comparative strengths of models, offering insights into their adaptability and applicability in real-world scenarios but also contributes to the field by providing a comprehensive evaluation method that considers both statistical significance and practical relevance to guide the selection of models for specific applications. Specifically, we propose Rank Score as a new evaluation metric that is designed for e-commerce image search applications and employ CLIP Score to quantify dataset variation to offer a holistic view of model performance.
%U https://aclanthology.org/2024.ecnlp-1.9
%P 79-87
Markdown (Informal)
[Assessing Image-Captioning Models: A Novel Framework Integrating Statistical Analysis and Metric Patterns](https://aclanthology.org/2024.ecnlp-1.9) (Li et al., ECNLP-WS 2024)
ACL