@inproceedings{han-etal-2026-multi,
title = "Multi-Scale Spectral Selection and Entropy-Guided Uncertainty Fusion for Multimodal Rumor Detection",
author = "Han, Zongliang and
Guo, Wenyu and
Jin, Guoqing and
Liu, Yang and
Li, Fan and
Yu, Dong and
Song, Yan and
Zhangfengzhen",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.55/",
pages = "1090--1100",
ISBN = "979-8-89176-395-1",
abstract = "Multimodal content combining textual and visual information poses significant challenges for rumor detection on social media. Compared to traditional spatial domain features, frequency domain features have attracted increasing attention due to their stronger discriminative capabilities. However, existing methods still fall short in capturing cross-modal semantic inconsistencies and often overlook inherent noise in multimodal features, which limits overall detection performance. To address these issues, we propose a novel multimodal rumor detection method based on multi-scale spectral selection and entropy-guided uncertainty fusion. Specifically, we first apply the Discrete Cosine Transform (DCT) to image and text features to convert them into the frequency domain. Then, multi-scale convolutional filters are employed to extract fine-grained information across different frequency scales. Next, modality separation is performed to capture both shared and modality-specific features, enabling more effective cross-modal representation learning. Finally, entropy is used to estimate the uncertainty of each prediction branch, calculate confidence scores, and perform adaptive weighted fusion accordingly. Experimental results on multiple benchmark datasets demonstrate that our method outperforms existing state-of-the-art approaches in multimodal rumor detection, demonstrating stronger detection capability and robustness."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="han-etal-2026-multi">
<titleInfo>
<title>Multi-Scale Spectral Selection and Entropy-Guided Uncertainty Fusion for Multimodal Rumor Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zongliang</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wenyu</namePart>
<namePart type="family">Guo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guoqing</namePart>
<namePart type="family">Jin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fan</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dong</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yan</namePart>
<namePart type="family">Song</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name>
<namePart>Zhangfengzhen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>Multimodal content combining textual and visual information poses significant challenges for rumor detection on social media. Compared to traditional spatial domain features, frequency domain features have attracted increasing attention due to their stronger discriminative capabilities. However, existing methods still fall short in capturing cross-modal semantic inconsistencies and often overlook inherent noise in multimodal features, which limits overall detection performance. To address these issues, we propose a novel multimodal rumor detection method based on multi-scale spectral selection and entropy-guided uncertainty fusion. Specifically, we first apply the Discrete Cosine Transform (DCT) to image and text features to convert them into the frequency domain. Then, multi-scale convolutional filters are employed to extract fine-grained information across different frequency scales. Next, modality separation is performed to capture both shared and modality-specific features, enabling more effective cross-modal representation learning. Finally, entropy is used to estimate the uncertainty of each prediction branch, calculate confidence scores, and perform adaptive weighted fusion accordingly. Experimental results on multiple benchmark datasets demonstrate that our method outperforms existing state-of-the-art approaches in multimodal rumor detection, demonstrating stronger detection capability and robustness.</abstract>
<identifier type="citekey">han-etal-2026-multi</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.55/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>1090</start>
<end>1100</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multi-Scale Spectral Selection and Entropy-Guided Uncertainty Fusion for Multimodal Rumor Detection
%A Han, Zongliang
%A Guo, Wenyu
%A Jin, Guoqing
%A Liu, Yang
%A Li, Fan
%A Yu, Dong
%A Song, Yan
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%A Zhangfengzhen
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F han-etal-2026-multi
%X Multimodal content combining textual and visual information poses significant challenges for rumor detection on social media. Compared to traditional spatial domain features, frequency domain features have attracted increasing attention due to their stronger discriminative capabilities. However, existing methods still fall short in capturing cross-modal semantic inconsistencies and often overlook inherent noise in multimodal features, which limits overall detection performance. To address these issues, we propose a novel multimodal rumor detection method based on multi-scale spectral selection and entropy-guided uncertainty fusion. Specifically, we first apply the Discrete Cosine Transform (DCT) to image and text features to convert them into the frequency domain. Then, multi-scale convolutional filters are employed to extract fine-grained information across different frequency scales. Next, modality separation is performed to capture both shared and modality-specific features, enabling more effective cross-modal representation learning. Finally, entropy is used to estimate the uncertainty of each prediction branch, calculate confidence scores, and perform adaptive weighted fusion accordingly. Experimental results on multiple benchmark datasets demonstrate that our method outperforms existing state-of-the-art approaches in multimodal rumor detection, demonstrating stronger detection capability and robustness.
%U https://aclanthology.org/2026.findings-acl.55/
%P 1090-1100
Markdown (Informal)
[Multi-Scale Spectral Selection and Entropy-Guided Uncertainty Fusion for Multimodal Rumor Detection](https://aclanthology.org/2026.findings-acl.55/) (Han et al., Findings 2026)
ACL
- Zongliang Han, Wenyu Guo, Guoqing Jin, Yang Liu, Fan Li, Dong Yu, Yan Song, and Zhangfengzhen. 2026. Multi-Scale Spectral Selection and Entropy-Guided Uncertainty Fusion for Multimodal Rumor Detection. In Findings of the Association for Computational Linguistics: ACL 2026, pages 1090–1100, San Diego, California, United States. Association for Computational Linguistics.