@inproceedings{lee-etal-2026-hospitality,
title = "Hospitality-{VQA}: Decision-Oriented Informativeness Evaluation for Vision{--}Language Models",
author = "Lee, Jeongwoo and
Duhyeong, Baek and
Han, Eungyeol and
Shin, Soyeon and
Han, Gukin and
Kim, Seungduk and
Jeon, Jaehyun and
Jeong, Taewoo",
editor = "Baez Santamaria, Selene and
Somayajula, Sai Ashish and
Yamaguchi, Atsuki",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 4: Student Research Workshop)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.eacl-srw.68/",
pages = "921--936",
ISBN = "979-8-89176-383-8",
abstract = "Recent advances in Vision{--}Language Models (VLMs) have demonstrated impressive multimodal understanding in general domains. However, their applicability to decision-oriented domains such as hospitality remains largely unexplored. In this work, we investigate how well VLMs can perform visual question answering (VQA) about hotel and facility images that are central to consumer decision-making. While many existing VQA benchmarks focus on factual correctness, they rarely capture what information users actually find useful. To address this, we first introduce $\textit{Informativeness}$ as a formal framework to quantify how much hospitality-relevant information an image{--}question pair provides.Guided by this framework, we construct a new hospitality-specific VQA dataset that covers various facility types, where questions are specifically designed to reflect key user information needs. Using this benchmark, we conduct experiments with several state-of-the-art VLMs, revealing that VLMs are not intrinsically decision-aware{---}key visual signals remain underutilized, and reliable informativeness reasoning emerges only after modest domain-specific finetuning."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lee-etal-2026-hospitality">
<titleInfo>
<title>Hospitality-VQA: Decision-Oriented Informativeness Evaluation for Vision–Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jeongwoo</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Baek</namePart>
<namePart type="family">Duhyeong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eungyeol</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Soyeon</namePart>
<namePart type="family">Shin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gukin</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seungduk</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jaehyun</namePart>
<namePart type="family">Jeon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Taewoo</namePart>
<namePart type="family">Jeong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 4: Student Research Workshop)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Selene</namePart>
<namePart type="family">Baez Santamaria</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sai</namePart>
<namePart type="given">Ashish</namePart>
<namePart type="family">Somayajula</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Atsuki</namePart>
<namePart type="family">Yamaguchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-383-8</identifier>
</relatedItem>
<abstract>Recent advances in Vision–Language Models (VLMs) have demonstrated impressive multimodal understanding in general domains. However, their applicability to decision-oriented domains such as hospitality remains largely unexplored. In this work, we investigate how well VLMs can perform visual question answering (VQA) about hotel and facility images that are central to consumer decision-making. While many existing VQA benchmarks focus on factual correctness, they rarely capture what information users actually find useful. To address this, we first introduce Informativeness as a formal framework to quantify how much hospitality-relevant information an image–question pair provides.Guided by this framework, we construct a new hospitality-specific VQA dataset that covers various facility types, where questions are specifically designed to reflect key user information needs. Using this benchmark, we conduct experiments with several state-of-the-art VLMs, revealing that VLMs are not intrinsically decision-aware—key visual signals remain underutilized, and reliable informativeness reasoning emerges only after modest domain-specific finetuning.</abstract>
<identifier type="citekey">lee-etal-2026-hospitality</identifier>
<location>
<url>https://aclanthology.org/2026.eacl-srw.68/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>921</start>
<end>936</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Hospitality-VQA: Decision-Oriented Informativeness Evaluation for Vision–Language Models
%A Lee, Jeongwoo
%A Duhyeong, Baek
%A Han, Eungyeol
%A Shin, Soyeon
%A Han, Gukin
%A Kim, Seungduk
%A Jeon, Jaehyun
%A Jeong, Taewoo
%Y Baez Santamaria, Selene
%Y Somayajula, Sai Ashish
%Y Yamaguchi, Atsuki
%S Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 4: Student Research Workshop)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-383-8
%F lee-etal-2026-hospitality
%X Recent advances in Vision–Language Models (VLMs) have demonstrated impressive multimodal understanding in general domains. However, their applicability to decision-oriented domains such as hospitality remains largely unexplored. In this work, we investigate how well VLMs can perform visual question answering (VQA) about hotel and facility images that are central to consumer decision-making. While many existing VQA benchmarks focus on factual correctness, they rarely capture what information users actually find useful. To address this, we first introduce Informativeness as a formal framework to quantify how much hospitality-relevant information an image–question pair provides.Guided by this framework, we construct a new hospitality-specific VQA dataset that covers various facility types, where questions are specifically designed to reflect key user information needs. Using this benchmark, we conduct experiments with several state-of-the-art VLMs, revealing that VLMs are not intrinsically decision-aware—key visual signals remain underutilized, and reliable informativeness reasoning emerges only after modest domain-specific finetuning.
%U https://aclanthology.org/2026.eacl-srw.68/
%P 921-936
Markdown (Informal)
[Hospitality-VQA: Decision-Oriented Informativeness Evaluation for Vision–Language Models](https://aclanthology.org/2026.eacl-srw.68/) (Lee et al., EACL 2026)
ACL
- Jeongwoo Lee, Baek Duhyeong, Eungyeol Han, Soyeon Shin, Gukin Han, Seungduk Kim, Jaehyun Jeon, and Taewoo Jeong. 2026. Hospitality-VQA: Decision-Oriented Informativeness Evaluation for Vision–Language Models. In Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 4: Student Research Workshop), pages 921–936, Rabat, Morocco. Association for Computational Linguistics.