@inproceedings{yoshida-etal-2025-visual,
title = "Visual Priming Effect on Large-scale Vision Language Models",
author = "Yoshida, Daiki and
Sakajo, Haruki and
Hayashi, Kazuki and
Sakai, Yusuke and
Kamigaito, Hidetaka and
Hayashi, Katsuhiko and
Watanabe, Taro",
editor = "Angelova, Galia and
Kunilovskaya, Maria and
Escribe, Marie and
Mitkov, Ruslan",
booktitle = "Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2025.ranlp-1.160/",
pages = "1385--1395",
abstract = "Large-scale Vision-Language Models (LVLMs) integrate linguistic and visual information, demonstrating advanced task-solving capabilities. These models are originally derived from Large Language Models, leading to strong capabilities for language tasks. However, the impact of additional visual information on model responses remains insufficiently understood. In this study, we focus on the priming effect, a psychological phenomenon, to investigate how visual information influences language task processing. We present additional intentionally designed images alongside two types of language tasks with different characteristics and analyze changes in the model{'}s responses. Our experimental results show that model responses shift in the direction intended by the image, suggesting that LVLMs do not simply ignore visual information but actively incorporate it into language processing. Furthermore, the similarity between this behavior and priming effects observed in human cognition suggests that LVLMs may share certain aspects of human cognitive mechanisms."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yoshida-etal-2025-visual">
<titleInfo>
<title>Visual Priming Effect on Large-scale Vision Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Daiki</namePart>
<namePart type="family">Yoshida</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haruki</namePart>
<namePart type="family">Sakajo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kazuki</namePart>
<namePart type="family">Hayashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yusuke</namePart>
<namePart type="family">Sakai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hidetaka</namePart>
<namePart type="family">Kamigaito</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katsuhiko</namePart>
<namePart type="family">Hayashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Taro</namePart>
<namePart type="family">Watanabe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era</title>
</titleInfo>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Kunilovskaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie</namePart>
<namePart type="family">Escribe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large-scale Vision-Language Models (LVLMs) integrate linguistic and visual information, demonstrating advanced task-solving capabilities. These models are originally derived from Large Language Models, leading to strong capabilities for language tasks. However, the impact of additional visual information on model responses remains insufficiently understood. In this study, we focus on the priming effect, a psychological phenomenon, to investigate how visual information influences language task processing. We present additional intentionally designed images alongside two types of language tasks with different characteristics and analyze changes in the model’s responses. Our experimental results show that model responses shift in the direction intended by the image, suggesting that LVLMs do not simply ignore visual information but actively incorporate it into language processing. Furthermore, the similarity between this behavior and priming effects observed in human cognition suggests that LVLMs may share certain aspects of human cognitive mechanisms.</abstract>
<identifier type="citekey">yoshida-etal-2025-visual</identifier>
<location>
<url>https://aclanthology.org/2025.ranlp-1.160/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>1385</start>
<end>1395</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Visual Priming Effect on Large-scale Vision Language Models
%A Yoshida, Daiki
%A Sakajo, Haruki
%A Hayashi, Kazuki
%A Sakai, Yusuke
%A Kamigaito, Hidetaka
%A Hayashi, Katsuhiko
%A Watanabe, Taro
%Y Angelova, Galia
%Y Kunilovskaya, Maria
%Y Escribe, Marie
%Y Mitkov, Ruslan
%S Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era
%D 2025
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F yoshida-etal-2025-visual
%X Large-scale Vision-Language Models (LVLMs) integrate linguistic and visual information, demonstrating advanced task-solving capabilities. These models are originally derived from Large Language Models, leading to strong capabilities for language tasks. However, the impact of additional visual information on model responses remains insufficiently understood. In this study, we focus on the priming effect, a psychological phenomenon, to investigate how visual information influences language task processing. We present additional intentionally designed images alongside two types of language tasks with different characteristics and analyze changes in the model’s responses. Our experimental results show that model responses shift in the direction intended by the image, suggesting that LVLMs do not simply ignore visual information but actively incorporate it into language processing. Furthermore, the similarity between this behavior and priming effects observed in human cognition suggests that LVLMs may share certain aspects of human cognitive mechanisms.
%U https://aclanthology.org/2025.ranlp-1.160/
%P 1385-1395
Markdown (Informal)
[Visual Priming Effect on Large-scale Vision Language Models](https://aclanthology.org/2025.ranlp-1.160/) (Yoshida et al., RANLP 2025)
ACL
- Daiki Yoshida, Haruki Sakajo, Kazuki Hayashi, Yusuke Sakai, Hidetaka Kamigaito, Katsuhiko Hayashi, and Taro Watanabe. 2025. Visual Priming Effect on Large-scale Vision Language Models. In Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era, pages 1385–1395, Varna, Bulgaria. INCOMA Ltd., Shoumen, Bulgaria.