@inproceedings{zhao-etal-2026-b,
title = "{B}-{APO}: Bias-Targeted Adversarial Preference Optimization for Debiasing Multimodal Large Language Models",
author = "Zhao, Pinlong and
Ding, Zike and
Ye, Zengshu and
Zhaoting, Zhou",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.1843/",
pages = "36979--36999",
ISBN = "979-8-89176-395-1",
abstract = "Multimodal Large Language Models (MLLMs) often suffer from modality bias, where the model disproportionately relies on one modality while neglecting critical information from others. Existing debiasing methods via modality masking create biased responses by completely removing an entire modality, forming an extreme and static training environment. However, real-world multimodal bias often emerges under subtle perturbations (e.g., mild occlusion, noisy instructions), where both modalities are present but the model is tempted to rely on spurious shortcuts. We propose B-APO (Bias-Targeted Adversarial Preference Optimization), which casts debiasing as a bias-targeted min-max game: we generate hard negatives by applying small adversarial perturbations in the latent space to maximally induce language-vision-prior reliance, and then perform preference alignment to enlarge the margin between clean and adversarial responses. This encourages the model to anchor on true cross-modal evidence even under the most adversarial conditions. Extensive experiments on bias and hallucination benchmarks demonstrate that B-APO achieves superior debiasing performance while maintaining general capabilities."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhao-etal-2026-b">
<titleInfo>
<title>B-APO: Bias-Targeted Adversarial Preference Optimization for Debiasing Multimodal Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pinlong</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zike</namePart>
<namePart type="family">Ding</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zengshu</namePart>
<namePart type="family">Ye</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhou</namePart>
<namePart type="family">Zhaoting</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>Multimodal Large Language Models (MLLMs) often suffer from modality bias, where the model disproportionately relies on one modality while neglecting critical information from others. Existing debiasing methods via modality masking create biased responses by completely removing an entire modality, forming an extreme and static training environment. However, real-world multimodal bias often emerges under subtle perturbations (e.g., mild occlusion, noisy instructions), where both modalities are present but the model is tempted to rely on spurious shortcuts. We propose B-APO (Bias-Targeted Adversarial Preference Optimization), which casts debiasing as a bias-targeted min-max game: we generate hard negatives by applying small adversarial perturbations in the latent space to maximally induce language-vision-prior reliance, and then perform preference alignment to enlarge the margin between clean and adversarial responses. This encourages the model to anchor on true cross-modal evidence even under the most adversarial conditions. Extensive experiments on bias and hallucination benchmarks demonstrate that B-APO achieves superior debiasing performance while maintaining general capabilities.</abstract>
<identifier type="citekey">zhao-etal-2026-b</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.1843/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>36979</start>
<end>36999</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T B-APO: Bias-Targeted Adversarial Preference Optimization for Debiasing Multimodal Large Language Models
%A Zhao, Pinlong
%A Ding, Zike
%A Ye, Zengshu
%A Zhaoting, Zhou
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F zhao-etal-2026-b
%X Multimodal Large Language Models (MLLMs) often suffer from modality bias, where the model disproportionately relies on one modality while neglecting critical information from others. Existing debiasing methods via modality masking create biased responses by completely removing an entire modality, forming an extreme and static training environment. However, real-world multimodal bias often emerges under subtle perturbations (e.g., mild occlusion, noisy instructions), where both modalities are present but the model is tempted to rely on spurious shortcuts. We propose B-APO (Bias-Targeted Adversarial Preference Optimization), which casts debiasing as a bias-targeted min-max game: we generate hard negatives by applying small adversarial perturbations in the latent space to maximally induce language-vision-prior reliance, and then perform preference alignment to enlarge the margin between clean and adversarial responses. This encourages the model to anchor on true cross-modal evidence even under the most adversarial conditions. Extensive experiments on bias and hallucination benchmarks demonstrate that B-APO achieves superior debiasing performance while maintaining general capabilities.
%U https://aclanthology.org/2026.findings-acl.1843/
%P 36979-36999
Markdown (Informal)
[B-APO: Bias-Targeted Adversarial Preference Optimization for Debiasing Multimodal Large Language Models](https://aclanthology.org/2026.findings-acl.1843/) (Zhao et al., Findings 2026)
ACL