@inproceedings{chang-etal-2026-ahead,
title = "{AHEAD}: Attention Head Energy-Aware Dynamics for Hallucination Mitigation in {MLLM}s",
author = "Chang, Jiale and
Li, Ying and
Tang, Siliang and
Zhuang, Yueting",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.425/",
pages = "8728--8739",
ISBN = "979-8-89176-395-1",
abstract = "Multimodal large language models excel at vision-language tasks but remain prone to hallucinations that undermine their reliability. Existing approaches predominantly treat hallucinations as classification errors, overlooking the heterogeneous behaviors of attention heads and their dynamic influences during inference. We revisit MLLM reasoning from an energy perspective and identify that hallucinations stem from imbalances between visual potential and language prior potential: when visual information is ambiguous or language priors dominate, attention heads tend to be driven by linguistic statistical patterns, generating content inconsistent with visual evidence. We propose AHEAD, a framework that quantifies the energetic properties of each attention head during object generation through two potential networks{---}the Visual Grounding Potential Network and the Language Prior Potential Network{---}and dynamically adjusts their contributions at inference time. Specifically, we amplify attention heads with strong visual grounding capacity while suppressing those overly reliant on language priors. Experiments across multiple benchmarks demonstrate that AHEAD significantly reduces hallucination rates without fine-tuning the base MLLM while maintaining generation quality."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chang-etal-2026-ahead">
<titleInfo>
<title>AHEAD: Attention Head Energy-Aware Dynamics for Hallucination Mitigation in MLLMs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jiale</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ying</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Siliang</namePart>
<namePart type="family">Tang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yueting</namePart>
<namePart type="family">Zhuang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>Multimodal large language models excel at vision-language tasks but remain prone to hallucinations that undermine their reliability. Existing approaches predominantly treat hallucinations as classification errors, overlooking the heterogeneous behaviors of attention heads and their dynamic influences during inference. We revisit MLLM reasoning from an energy perspective and identify that hallucinations stem from imbalances between visual potential and language prior potential: when visual information is ambiguous or language priors dominate, attention heads tend to be driven by linguistic statistical patterns, generating content inconsistent with visual evidence. We propose AHEAD, a framework that quantifies the energetic properties of each attention head during object generation through two potential networks—the Visual Grounding Potential Network and the Language Prior Potential Network—and dynamically adjusts their contributions at inference time. Specifically, we amplify attention heads with strong visual grounding capacity while suppressing those overly reliant on language priors. Experiments across multiple benchmarks demonstrate that AHEAD significantly reduces hallucination rates without fine-tuning the base MLLM while maintaining generation quality.</abstract>
<identifier type="citekey">chang-etal-2026-ahead</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.425/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>8728</start>
<end>8739</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T AHEAD: Attention Head Energy-Aware Dynamics for Hallucination Mitigation in MLLMs
%A Chang, Jiale
%A Li, Ying
%A Tang, Siliang
%A Zhuang, Yueting
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F chang-etal-2026-ahead
%X Multimodal large language models excel at vision-language tasks but remain prone to hallucinations that undermine their reliability. Existing approaches predominantly treat hallucinations as classification errors, overlooking the heterogeneous behaviors of attention heads and their dynamic influences during inference. We revisit MLLM reasoning from an energy perspective and identify that hallucinations stem from imbalances between visual potential and language prior potential: when visual information is ambiguous or language priors dominate, attention heads tend to be driven by linguistic statistical patterns, generating content inconsistent with visual evidence. We propose AHEAD, a framework that quantifies the energetic properties of each attention head during object generation through two potential networks—the Visual Grounding Potential Network and the Language Prior Potential Network—and dynamically adjusts their contributions at inference time. Specifically, we amplify attention heads with strong visual grounding capacity while suppressing those overly reliant on language priors. Experiments across multiple benchmarks demonstrate that AHEAD significantly reduces hallucination rates without fine-tuning the base MLLM while maintaining generation quality.
%U https://aclanthology.org/2026.findings-acl.425/
%P 8728-8739
Markdown (Informal)
[AHEAD: Attention Head Energy-Aware Dynamics for Hallucination Mitigation in MLLMs](https://aclanthology.org/2026.findings-acl.425/) (Chang et al., Findings 2026)
ACL