@inproceedings{hou-etal-2026-chromic,
title = "{CHROMIC}: Chronological Reasoning Across Multi-Panel Comics",
author = "Hou, Bingxuan and
Lin, Jiayi and
Zhang, Chenyang and
Yin, Dapeng and
Zhu, Shuyue and
Hong, Qingqing and
Gao, Mengna and
Wang, Junli",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.eacl-long.205/",
pages = "4384--4400",
ISBN = "979-8-89176-380-7",
abstract = "Large-scale vision{--}language models (LVLMs) have achieved remarkable progress on various reasoning tasks. However, most studies focus on natural photographic images and pay limited attention to multi-panel visual narratives such as comics. This leaves a clear gap in our understanding of how well LVLMs perform chronological reasoning across comic panels. To address this, we introduce **ChrOMIC**, a new benchmark dataset for **chro**nological reasoning in multi-panel **comic**s. It covers six types of reasoning questions and spans both Western and Japanese comic styles. To ensure high-quality annotations, we customized a human{--}AI collaborative annotation process tailored to the characteristics of the two comic styles. We further introduce three core tasks: Description Reordering and Panel Reordering, which jointly assess models' ability to understand chronological order in panel sequences, and Multiple-Choice Question Answering (MCQA), which evaluates narrative-level reasoning. We evaluate a range of open-source and commercial LVLMs on ChrOMIC, and find that even the leading models struggle with panel-based chronological reasoning. Further analysis reveals key limitations, including weak visual action understanding and frequent hallucinations in fine-grained visual interpretation."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hou-etal-2026-chromic">
<titleInfo>
<title>CHROMIC: Chronological Reasoning Across Multi-Panel Comics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bingxuan</namePart>
<namePart type="family">Hou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiayi</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chenyang</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dapeng</namePart>
<namePart type="family">Yin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shuyue</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qingqing</namePart>
<namePart type="family">Hong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mengna</namePart>
<namePart type="family">Gao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junli</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Marquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-380-7</identifier>
</relatedItem>
<abstract>Large-scale vision–language models (LVLMs) have achieved remarkable progress on various reasoning tasks. However, most studies focus on natural photographic images and pay limited attention to multi-panel visual narratives such as comics. This leaves a clear gap in our understanding of how well LVLMs perform chronological reasoning across comic panels. To address this, we introduce **ChrOMIC**, a new benchmark dataset for **chro**nological reasoning in multi-panel **comic**s. It covers six types of reasoning questions and spans both Western and Japanese comic styles. To ensure high-quality annotations, we customized a human–AI collaborative annotation process tailored to the characteristics of the two comic styles. We further introduce three core tasks: Description Reordering and Panel Reordering, which jointly assess models’ ability to understand chronological order in panel sequences, and Multiple-Choice Question Answering (MCQA), which evaluates narrative-level reasoning. We evaluate a range of open-source and commercial LVLMs on ChrOMIC, and find that even the leading models struggle with panel-based chronological reasoning. Further analysis reveals key limitations, including weak visual action understanding and frequent hallucinations in fine-grained visual interpretation.</abstract>
<identifier type="citekey">hou-etal-2026-chromic</identifier>
<location>
<url>https://aclanthology.org/2026.eacl-long.205/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>4384</start>
<end>4400</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CHROMIC: Chronological Reasoning Across Multi-Panel Comics
%A Hou, Bingxuan
%A Lin, Jiayi
%A Zhang, Chenyang
%A Yin, Dapeng
%A Zhu, Shuyue
%A Hong, Qingqing
%A Gao, Mengna
%A Wang, Junli
%Y Demberg, Vera
%Y Inui, Kentaro
%Y Marquez, Lluís
%S Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-380-7
%F hou-etal-2026-chromic
%X Large-scale vision–language models (LVLMs) have achieved remarkable progress on various reasoning tasks. However, most studies focus on natural photographic images and pay limited attention to multi-panel visual narratives such as comics. This leaves a clear gap in our understanding of how well LVLMs perform chronological reasoning across comic panels. To address this, we introduce **ChrOMIC**, a new benchmark dataset for **chro**nological reasoning in multi-panel **comic**s. It covers six types of reasoning questions and spans both Western and Japanese comic styles. To ensure high-quality annotations, we customized a human–AI collaborative annotation process tailored to the characteristics of the two comic styles. We further introduce three core tasks: Description Reordering and Panel Reordering, which jointly assess models’ ability to understand chronological order in panel sequences, and Multiple-Choice Question Answering (MCQA), which evaluates narrative-level reasoning. We evaluate a range of open-source and commercial LVLMs on ChrOMIC, and find that even the leading models struggle with panel-based chronological reasoning. Further analysis reveals key limitations, including weak visual action understanding and frequent hallucinations in fine-grained visual interpretation.
%U https://aclanthology.org/2026.eacl-long.205/
%P 4384-4400
Markdown (Informal)
[CHROMIC: Chronological Reasoning Across Multi-Panel Comics](https://aclanthology.org/2026.eacl-long.205/) (Hou et al., EACL 2026)
ACL
- Bingxuan Hou, Jiayi Lin, Chenyang Zhang, Dapeng Yin, Shuyue Zhu, Qingqing Hong, Mengna Gao, and Junli Wang. 2026. CHROMIC: Chronological Reasoning Across Multi-Panel Comics. In Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers), pages 4384–4400, Rabat, Morocco. Association for Computational Linguistics.