@inproceedings{xu-etal-2026-janusmm,
title = "{J}anus{MM}: A Benchmark for Self-Deprecation Understanding in Real-World Multimodal Conversations",
author = "Xu, Xinyi and
Hao, Bingguang and
Xiong, Yongyi and
Chen, Zimo and
Liu, Xinchen and
Guo, Hongxin and
Wang, Xuelong and
Zhou, Silin and
Dou, Shihan",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-long.1116/",
pages = "24324--24343",
ISBN = "979-8-89176-390-6",
abstract = "Self-deprecation is a prevalent communicative strategy in human society, often using image-text interplay to express emotions and intentions. Despite self-deprecation is widespread in real-world conversations, the ability of multimodal large language models (MLLMs) to understand it remains underexplored. To fill this gap, we introduce **JanusMM**, the first benchmark designed to evaluate MLLMs' understanding of self-deprecation in real-world conversations. JanusMM contains 2,016 bilingual memes from three types of social interactions and provides a dual-task evaluation framework with six new metrics. The first task assesses MLLMs' abilities in self-deprecation recognition and reasoning, while the second task evaluates the consistency of their understanding by simulating the perspectives of the initiator and responder. We evaluate ten frontier MLLMs and find that they exhibit weak recognition and reasoning abilities, with their understanding of self-deprecation remaining inconsistent across both perspectives."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="xu-etal-2026-janusmm">
<titleInfo>
<title>JanusMM: A Benchmark for Self-Deprecation Understanding in Real-World Multimodal Conversations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xinyi</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bingguang</namePart>
<namePart type="family">Hao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yongyi</namePart>
<namePart type="family">Xiong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zimo</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xinchen</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hongxin</namePart>
<namePart type="family">Guo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuelong</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Silin</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shihan</namePart>
<namePart type="family">Dou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-390-6</identifier>
</relatedItem>
<abstract>Self-deprecation is a prevalent communicative strategy in human society, often using image-text interplay to express emotions and intentions. Despite self-deprecation is widespread in real-world conversations, the ability of multimodal large language models (MLLMs) to understand it remains underexplored. To fill this gap, we introduce **JanusMM**, the first benchmark designed to evaluate MLLMs’ understanding of self-deprecation in real-world conversations. JanusMM contains 2,016 bilingual memes from three types of social interactions and provides a dual-task evaluation framework with six new metrics. The first task assesses MLLMs’ abilities in self-deprecation recognition and reasoning, while the second task evaluates the consistency of their understanding by simulating the perspectives of the initiator and responder. We evaluate ten frontier MLLMs and find that they exhibit weak recognition and reasoning abilities, with their understanding of self-deprecation remaining inconsistent across both perspectives.</abstract>
<identifier type="citekey">xu-etal-2026-janusmm</identifier>
<location>
<url>https://aclanthology.org/2026.acl-long.1116/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>24324</start>
<end>24343</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T JanusMM: A Benchmark for Self-Deprecation Understanding in Real-World Multimodal Conversations
%A Xu, Xinyi
%A Hao, Bingguang
%A Xiong, Yongyi
%A Chen, Zimo
%A Liu, Xinchen
%A Guo, Hongxin
%A Wang, Xuelong
%A Zhou, Silin
%A Dou, Shihan
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-390-6
%F xu-etal-2026-janusmm
%X Self-deprecation is a prevalent communicative strategy in human society, often using image-text interplay to express emotions and intentions. Despite self-deprecation is widespread in real-world conversations, the ability of multimodal large language models (MLLMs) to understand it remains underexplored. To fill this gap, we introduce **JanusMM**, the first benchmark designed to evaluate MLLMs’ understanding of self-deprecation in real-world conversations. JanusMM contains 2,016 bilingual memes from three types of social interactions and provides a dual-task evaluation framework with six new metrics. The first task assesses MLLMs’ abilities in self-deprecation recognition and reasoning, while the second task evaluates the consistency of their understanding by simulating the perspectives of the initiator and responder. We evaluate ten frontier MLLMs and find that they exhibit weak recognition and reasoning abilities, with their understanding of self-deprecation remaining inconsistent across both perspectives.
%U https://aclanthology.org/2026.acl-long.1116/
%P 24324-24343
Markdown (Informal)
[JanusMM: A Benchmark for Self-Deprecation Understanding in Real-World Multimodal Conversations](https://aclanthology.org/2026.acl-long.1116/) (Xu et al., ACL 2026)
ACL
- Xinyi Xu, Bingguang Hao, Yongyi Xiong, Zimo Chen, Xinchen Liu, Hongxin Guo, Xuelong Wang, Silin Zhou, and Shihan Dou. 2026. JanusMM: A Benchmark for Self-Deprecation Understanding in Real-World Multimodal Conversations. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 24324–24343, San Diego, California, United States. Association for Computational Linguistics.