@inproceedings{lin-etal-2025-intellicockpitbench,
title = "{I}ntelli{C}ockpit{B}ench: A Comprehensive Benchmark to Evaluate {VLM}s for Intelligent Cockpit",
author = "Lin, Liang and
Chai, Siyuan and
Wu, Jiahao and
Hu, Hongbing and
Gu, Xiaotao and
Hu, Hao and
Zhang, Fan and
Wang, Wei and
Zhang, Dan",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.798/",
doi = "10.18653/v1/2025.findings-acl.798",
pages = "15453--15475",
ISBN = "979-8-89176-256-5",
abstract = "The integration of sophisticated Vision-Language Models (VLMs) in vehicular systems is revolutionizing vehicle interaction and safety, performing tasks such as Visual Question Answering (VQA). However, a critical gap persists due to the lack of a comprehensive benchmark for multimodal VQA models in vehicular scenarios. To address this, we propose IntelliCockpitBench, a benchmark that encompasses diverse automotive scenarios. It includes images from front, side, and rear cameras, various road types, weather conditions, and interior views, integrating data from both moving and stationary states. Notably, all images and queries in the benchmark are verified for high levels of authenticity, ensuring the data accurately reflects real-world conditions. A sophisticated scoring methodology combining human and model-generated assessments enhances reliability and consistency. Our contributions include a diverse and authentic dataset for automotive VQA and a robust evaluation metric aligning human and machine assessments. All code and data can be found at \url{https://github.com/Lane315/IntelliCockpitBench}."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lin-etal-2025-intellicockpitbench">
<titleInfo>
<title>IntelliCockpitBench: A Comprehensive Benchmark to Evaluate VLMs for Intelligent Cockpit</title>
</titleInfo>
<name type="personal">
<namePart type="given">Liang</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Siyuan</namePart>
<namePart type="family">Chai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiahao</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hongbing</namePart>
<namePart type="family">Hu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaotao</namePart>
<namePart type="family">Gu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hao</namePart>
<namePart type="family">Hu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fan</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-256-5</identifier>
</relatedItem>
<abstract>The integration of sophisticated Vision-Language Models (VLMs) in vehicular systems is revolutionizing vehicle interaction and safety, performing tasks such as Visual Question Answering (VQA). However, a critical gap persists due to the lack of a comprehensive benchmark for multimodal VQA models in vehicular scenarios. To address this, we propose IntelliCockpitBench, a benchmark that encompasses diverse automotive scenarios. It includes images from front, side, and rear cameras, various road types, weather conditions, and interior views, integrating data from both moving and stationary states. Notably, all images and queries in the benchmark are verified for high levels of authenticity, ensuring the data accurately reflects real-world conditions. A sophisticated scoring methodology combining human and model-generated assessments enhances reliability and consistency. Our contributions include a diverse and authentic dataset for automotive VQA and a robust evaluation metric aligning human and machine assessments. All code and data can be found at https://github.com/Lane315/IntelliCockpitBench.</abstract>
<identifier type="citekey">lin-etal-2025-intellicockpitbench</identifier>
<identifier type="doi">10.18653/v1/2025.findings-acl.798</identifier>
<location>
<url>https://aclanthology.org/2025.findings-acl.798/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>15453</start>
<end>15475</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T IntelliCockpitBench: A Comprehensive Benchmark to Evaluate VLMs for Intelligent Cockpit
%A Lin, Liang
%A Chai, Siyuan
%A Wu, Jiahao
%A Hu, Hongbing
%A Gu, Xiaotao
%A Hu, Hao
%A Zhang, Fan
%A Wang, Wei
%A Zhang, Dan
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Findings of the Association for Computational Linguistics: ACL 2025
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-256-5
%F lin-etal-2025-intellicockpitbench
%X The integration of sophisticated Vision-Language Models (VLMs) in vehicular systems is revolutionizing vehicle interaction and safety, performing tasks such as Visual Question Answering (VQA). However, a critical gap persists due to the lack of a comprehensive benchmark for multimodal VQA models in vehicular scenarios. To address this, we propose IntelliCockpitBench, a benchmark that encompasses diverse automotive scenarios. It includes images from front, side, and rear cameras, various road types, weather conditions, and interior views, integrating data from both moving and stationary states. Notably, all images and queries in the benchmark are verified for high levels of authenticity, ensuring the data accurately reflects real-world conditions. A sophisticated scoring methodology combining human and model-generated assessments enhances reliability and consistency. Our contributions include a diverse and authentic dataset for automotive VQA and a robust evaluation metric aligning human and machine assessments. All code and data can be found at https://github.com/Lane315/IntelliCockpitBench.
%R 10.18653/v1/2025.findings-acl.798
%U https://aclanthology.org/2025.findings-acl.798/
%U https://doi.org/10.18653/v1/2025.findings-acl.798
%P 15453-15475
Markdown (Informal)
[IntelliCockpitBench: A Comprehensive Benchmark to Evaluate VLMs for Intelligent Cockpit](https://aclanthology.org/2025.findings-acl.798/) (Lin et al., Findings 2025)
ACL
- Liang Lin, Siyuan Chai, Jiahao Wu, Hongbing Hu, Xiaotao Gu, Hao Hu, Fan Zhang, Wei Wang, and Dan Zhang. 2025. IntelliCockpitBench: A Comprehensive Benchmark to Evaluate VLMs for Intelligent Cockpit. In Findings of the Association for Computational Linguistics: ACL 2025, pages 15453–15475, Vienna, Austria. Association for Computational Linguistics.