@inproceedings{peng-etal-2026-modalities,
title = "Not All Modalities at Once: Dynamic Dropout and Bidirectional Fusion for Robust Multi-modal Knowledge Graph Completion",
author = "Peng, Jiashun and
Zhang, Fu and
Chen, Hongzhi and
Cheng, Jingwei and
Ning, Yingsong and
Wang, Xiaoke",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.890/",
pages = "17928--17940",
ISBN = "979-8-89176-395-1",
abstract = "Multi-modal Knowledge Graph Completion (MKGC) aims to infer missing links in multimodal knowledge graphs by leveraging structured triples together with auxiliary modalities such as text and images. Existing MKGC methods typically train with all modalities available, implicitly assuming consistent complementarity; however, this practice often induces modality dependence and modality competition under heterogeneous noise, which can hinder robust multi-modal fusion and limit overall performance.To address these issues, we propose **MDBGF**, a **M**odality **D**ropout and **B**idirectional **G**ated **F**usion framework for MKGC. MDBGF introduces a *dynamic, probability-based modality dropout* schedule. When the dropout is activated, MDBGF drops either the textual or visual modality during training while always preserving the structural information, encouraging the model to reduce over-reliance on any single auxiliary modality and to learn complementary cues under missing-modality conditions. When the dropout is not activated (i.e., all modalities are present), we further design a *bidirectional gated fusion* mechanism that enables mutual modulation between textual and visual modalities, enhancing cross-modal interaction and flexible fusion. In addition, we propose an *adaptive proportional hybrid negative sampling* strategy to strengthen MDBGF{'}s discriminative ability on hard negatives. Experiments on three benchmarks show that MDBGF consistently outperforms existing baselines and achieves new state-of-the-art results. Our code is available at https://anonymous.4open.science/r/MDBGF-AHNS."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="peng-etal-2026-modalities">
<titleInfo>
<title>Not All Modalities at Once: Dynamic Dropout and Bidirectional Fusion for Robust Multi-modal Knowledge Graph Completion</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jiashun</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fu</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hongzhi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jingwei</namePart>
<namePart type="family">Cheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yingsong</namePart>
<namePart type="family">Ning</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaoke</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>Multi-modal Knowledge Graph Completion (MKGC) aims to infer missing links in multimodal knowledge graphs by leveraging structured triples together with auxiliary modalities such as text and images. Existing MKGC methods typically train with all modalities available, implicitly assuming consistent complementarity; however, this practice often induces modality dependence and modality competition under heterogeneous noise, which can hinder robust multi-modal fusion and limit overall performance.To address these issues, we propose **MDBGF**, a **M**odality **D**ropout and **B**idirectional **G**ated **F**usion framework for MKGC. MDBGF introduces a *dynamic, probability-based modality dropout* schedule. When the dropout is activated, MDBGF drops either the textual or visual modality during training while always preserving the structural information, encouraging the model to reduce over-reliance on any single auxiliary modality and to learn complementary cues under missing-modality conditions. When the dropout is not activated (i.e., all modalities are present), we further design a *bidirectional gated fusion* mechanism that enables mutual modulation between textual and visual modalities, enhancing cross-modal interaction and flexible fusion. In addition, we propose an *adaptive proportional hybrid negative sampling* strategy to strengthen MDBGF’s discriminative ability on hard negatives. Experiments on three benchmarks show that MDBGF consistently outperforms existing baselines and achieves new state-of-the-art results. Our code is available at https://anonymous.4open.science/r/MDBGF-AHNS.</abstract>
<identifier type="citekey">peng-etal-2026-modalities</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.890/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>17928</start>
<end>17940</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Not All Modalities at Once: Dynamic Dropout and Bidirectional Fusion for Robust Multi-modal Knowledge Graph Completion
%A Peng, Jiashun
%A Zhang, Fu
%A Chen, Hongzhi
%A Cheng, Jingwei
%A Ning, Yingsong
%A Wang, Xiaoke
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F peng-etal-2026-modalities
%X Multi-modal Knowledge Graph Completion (MKGC) aims to infer missing links in multimodal knowledge graphs by leveraging structured triples together with auxiliary modalities such as text and images. Existing MKGC methods typically train with all modalities available, implicitly assuming consistent complementarity; however, this practice often induces modality dependence and modality competition under heterogeneous noise, which can hinder robust multi-modal fusion and limit overall performance.To address these issues, we propose **MDBGF**, a **M**odality **D**ropout and **B**idirectional **G**ated **F**usion framework for MKGC. MDBGF introduces a *dynamic, probability-based modality dropout* schedule. When the dropout is activated, MDBGF drops either the textual or visual modality during training while always preserving the structural information, encouraging the model to reduce over-reliance on any single auxiliary modality and to learn complementary cues under missing-modality conditions. When the dropout is not activated (i.e., all modalities are present), we further design a *bidirectional gated fusion* mechanism that enables mutual modulation between textual and visual modalities, enhancing cross-modal interaction and flexible fusion. In addition, we propose an *adaptive proportional hybrid negative sampling* strategy to strengthen MDBGF’s discriminative ability on hard negatives. Experiments on three benchmarks show that MDBGF consistently outperforms existing baselines and achieves new state-of-the-art results. Our code is available at https://anonymous.4open.science/r/MDBGF-AHNS.
%U https://aclanthology.org/2026.findings-acl.890/
%P 17928-17940
Markdown (Informal)
[Not All Modalities at Once: Dynamic Dropout and Bidirectional Fusion for Robust Multi-modal Knowledge Graph Completion](https://aclanthology.org/2026.findings-acl.890/) (Peng et al., Findings 2026)
ACL