@inproceedings{wang-etal-2025-jailbreak,
title = "Jailbreak Large Vision-Language Models Through Multi-Modal Linkage",
author = "Wang, Yu and
Zhou, Xiaofei and
Wang, Yichen and
Zhang, Geyuan and
He, Tianxing",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-long.74/",
doi = "10.18653/v1/2025.acl-long.74",
pages = "1466--1494",
ISBN = "979-8-89176-251-0",
abstract = "With the rapid advancement of Large Vision-Language Models (VLMs), concerns about their {\textcompwordmark}potential misuse and abuse have grown rapidly. Prior research has exposed VLMs' vulnerability to jailbreak attacks, where carefully crafted inputs can lead the model to produce content that violates ethical and legal standards. However, current jailbreak methods often fail against cutting-edge models such as GPT-4o. We attribute this to the over-exposure of harmful content and the absence of stealthy malicious guidance. In this work, we introduce a novel jailbreak framework: Multi-Modal Linkage (MML) Attack. Drawing inspiration from cryptography, MML employs an encryption-decryption process across text and image modalities to mitigate the over-exposure of malicious information. To covertly align the model{'}s output with harmful objectives, MML leverages a technique we term evil alignment, framing the attack within the narrative context of a video game development scenario. Extensive experiments validate the effectiveness of MML. Specifically, MML jailbreaks GPT-4o with attack success rates of 99.40{\%} on SafeBench, 98.81{\%} on MM-SafeBench, and 99.07{\%} on HADES-Dataset. Our code is available at https://github.com/wangyu-ovo/MML."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-etal-2025-jailbreak">
<titleInfo>
<title>Jailbreak Large Vision-Language Models Through Multi-Modal Linkage</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaofei</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yichen</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Geyuan</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tianxing</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-251-0</identifier>
</relatedItem>
<abstract>With the rapid advancement of Large Vision-Language Models (VLMs), concerns about their \textcompwordmarkpotential misuse and abuse have grown rapidly. Prior research has exposed VLMs’ vulnerability to jailbreak attacks, where carefully crafted inputs can lead the model to produce content that violates ethical and legal standards. However, current jailbreak methods often fail against cutting-edge models such as GPT-4o. We attribute this to the over-exposure of harmful content and the absence of stealthy malicious guidance. In this work, we introduce a novel jailbreak framework: Multi-Modal Linkage (MML) Attack. Drawing inspiration from cryptography, MML employs an encryption-decryption process across text and image modalities to mitigate the over-exposure of malicious information. To covertly align the model’s output with harmful objectives, MML leverages a technique we term evil alignment, framing the attack within the narrative context of a video game development scenario. Extensive experiments validate the effectiveness of MML. Specifically, MML jailbreaks GPT-4o with attack success rates of 99.40% on SafeBench, 98.81% on MM-SafeBench, and 99.07% on HADES-Dataset. Our code is available at https://github.com/wangyu-ovo/MML.</abstract>
<identifier type="citekey">wang-etal-2025-jailbreak</identifier>
<identifier type="doi">10.18653/v1/2025.acl-long.74</identifier>
<location>
<url>https://aclanthology.org/2025.acl-long.74/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>1466</start>
<end>1494</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Jailbreak Large Vision-Language Models Through Multi-Modal Linkage
%A Wang, Yu
%A Zhou, Xiaofei
%A Wang, Yichen
%A Zhang, Geyuan
%A He, Tianxing
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-251-0
%F wang-etal-2025-jailbreak
%X With the rapid advancement of Large Vision-Language Models (VLMs), concerns about their \textcompwordmarkpotential misuse and abuse have grown rapidly. Prior research has exposed VLMs’ vulnerability to jailbreak attacks, where carefully crafted inputs can lead the model to produce content that violates ethical and legal standards. However, current jailbreak methods often fail against cutting-edge models such as GPT-4o. We attribute this to the over-exposure of harmful content and the absence of stealthy malicious guidance. In this work, we introduce a novel jailbreak framework: Multi-Modal Linkage (MML) Attack. Drawing inspiration from cryptography, MML employs an encryption-decryption process across text and image modalities to mitigate the over-exposure of malicious information. To covertly align the model’s output with harmful objectives, MML leverages a technique we term evil alignment, framing the attack within the narrative context of a video game development scenario. Extensive experiments validate the effectiveness of MML. Specifically, MML jailbreaks GPT-4o with attack success rates of 99.40% on SafeBench, 98.81% on MM-SafeBench, and 99.07% on HADES-Dataset. Our code is available at https://github.com/wangyu-ovo/MML.
%R 10.18653/v1/2025.acl-long.74
%U https://aclanthology.org/2025.acl-long.74/
%U https://doi.org/10.18653/v1/2025.acl-long.74
%P 1466-1494
Markdown (Informal)
[Jailbreak Large Vision-Language Models Through Multi-Modal Linkage](https://aclanthology.org/2025.acl-long.74/) (Wang et al., ACL 2025)
ACL
- Yu Wang, Xiaofei Zhou, Yichen Wang, Geyuan Zhang, and Tianxing He. 2025. Jailbreak Large Vision-Language Models Through Multi-Modal Linkage. In Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 1466–1494, Vienna, Austria. Association for Computational Linguistics.