@inproceedings{tater-etal-2024-evaluating,
title = "Evaluating Semantic Relations in Predicting Textual Labels for Images of Abstract and Concrete Concepts",
author = "Tater, Tarun and
Schulte Im Walde, Sabine and
Frassinelli, Diego",
editor = "Kuribayashi, Tatsuki and
Rambelli, Giulia and
Takmaz, Ece and
Wicke, Philipp and
Oseki, Yohei",
booktitle = "Proceedings of the Workshop on Cognitive Modeling and Computational Linguistics",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.cmcl-1.18",
doi = "10.18653/v1/2024.cmcl-1.18",
pages = "214--220",
abstract = "This study investigates the performance of SigLIP, a state-of-the-art Vision-Language Model (VLM), in predicting labels for images depicting 1,278 concepts. Our analysis across 300 images per concept shows that the model frequently predicts the exact user-tagged labels, but similarly, it often predicts labels that are semantically related to the exact labels in various ways: synonyms, hypernyms, co-hyponyms, and associated words, particularly for abstract concepts. We then zoom into the diversity of the user tags of images and word associations for abstract versus concrete concepts. Surprisingly, not only abstract but also concrete concepts exhibit significant variability, thus challenging the traditional view that representations of concrete concepts are less diverse.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tater-etal-2024-evaluating">
<titleInfo>
<title>Evaluating Semantic Relations in Predicting Textual Labels for Images of Abstract and Concrete Concepts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tarun</namePart>
<namePart type="family">Tater</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sabine</namePart>
<namePart type="family">Schulte Im Walde</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diego</namePart>
<namePart type="family">Frassinelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Cognitive Modeling and Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tatsuki</namePart>
<namePart type="family">Kuribayashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giulia</namePart>
<namePart type="family">Rambelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ece</namePart>
<namePart type="family">Takmaz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Wicke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yohei</namePart>
<namePart type="family">Oseki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This study investigates the performance of SigLIP, a state-of-the-art Vision-Language Model (VLM), in predicting labels for images depicting 1,278 concepts. Our analysis across 300 images per concept shows that the model frequently predicts the exact user-tagged labels, but similarly, it often predicts labels that are semantically related to the exact labels in various ways: synonyms, hypernyms, co-hyponyms, and associated words, particularly for abstract concepts. We then zoom into the diversity of the user tags of images and word associations for abstract versus concrete concepts. Surprisingly, not only abstract but also concrete concepts exhibit significant variability, thus challenging the traditional view that representations of concrete concepts are less diverse.</abstract>
<identifier type="citekey">tater-etal-2024-evaluating</identifier>
<identifier type="doi">10.18653/v1/2024.cmcl-1.18</identifier>
<location>
<url>https://aclanthology.org/2024.cmcl-1.18</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>214</start>
<end>220</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluating Semantic Relations in Predicting Textual Labels for Images of Abstract and Concrete Concepts
%A Tater, Tarun
%A Schulte Im Walde, Sabine
%A Frassinelli, Diego
%Y Kuribayashi, Tatsuki
%Y Rambelli, Giulia
%Y Takmaz, Ece
%Y Wicke, Philipp
%Y Oseki, Yohei
%S Proceedings of the Workshop on Cognitive Modeling and Computational Linguistics
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F tater-etal-2024-evaluating
%X This study investigates the performance of SigLIP, a state-of-the-art Vision-Language Model (VLM), in predicting labels for images depicting 1,278 concepts. Our analysis across 300 images per concept shows that the model frequently predicts the exact user-tagged labels, but similarly, it often predicts labels that are semantically related to the exact labels in various ways: synonyms, hypernyms, co-hyponyms, and associated words, particularly for abstract concepts. We then zoom into the diversity of the user tags of images and word associations for abstract versus concrete concepts. Surprisingly, not only abstract but also concrete concepts exhibit significant variability, thus challenging the traditional view that representations of concrete concepts are less diverse.
%R 10.18653/v1/2024.cmcl-1.18
%U https://aclanthology.org/2024.cmcl-1.18
%U https://doi.org/10.18653/v1/2024.cmcl-1.18
%P 214-220
Markdown (Informal)
[Evaluating Semantic Relations in Predicting Textual Labels for Images of Abstract and Concrete Concepts](https://aclanthology.org/2024.cmcl-1.18) (Tater et al., CMCL-WS 2024)
ACL