@inproceedings{wu-etal-2025-sharp,
title = "{SHARP}: Steering Hallucination in {LVLM}s via Representation Engineering",
author = "Wu, Junfei and
Ding, Yue and
Liu, Guofan and
Xia, Tianze and
Huang, Ziyue and
Sui, Dianbo and
Liu, Qiang and
Wu, Shu and
Wang, Liang and
Tan, Tieniu",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-main.725/",
pages = "14357--14372",
ISBN = "979-8-89176-332-6",
abstract = "Despite their impressive capabilities, Large Vision-Language Models (LVLMs) frequently generate responses that are plausible but incorrect or unsupported{---}commonly referred to as hallucinations. In this study, we investigate whether different types of hallucinations are reflected in the model{'}s internal representations by probing their encoded features. We focus on two key causes of hallucination in multimodal reasoning: (1) over-reliance on textual priors and (2) preference for user prompts over conflicting visual evidence{---}factors identified in prior work as frequent and impactful. Our probing results reveal that hallucinations exhibit distinguishable representational patterns, suggesting the potential for a representation-level approach to characterize and mitigate them. Motivated by these findings, we propose Steering HAllucination via RePresentation Engineering (SHARP), a representation-level intervention framework that modulates hallucination-related features during inference. SHARP identifies functional representations responsible for prior-driven biases and visual-context conflicts, and jointly adjusts the model{'}s internal activations in real time. We evaluate our approach extensively on three large vision-language models across multiple benchmarks. Experimental results demonstrate that SHARP effectively reduces hallucinations while preserving the performance and generalization capabilities of LVLMs."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wu-etal-2025-sharp">
<titleInfo>
<title>SHARP: Steering Hallucination in LVLMs via Representation Engineering</title>
</titleInfo>
<name type="personal">
<namePart type="given">Junfei</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Ding</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guofan</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tianze</namePart>
<namePart type="family">Xia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ziyue</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dianbo</namePart>
<namePart type="family">Sui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qiang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shu</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liang</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tieniu</namePart>
<namePart type="family">Tan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-332-6</identifier>
</relatedItem>
<abstract>Despite their impressive capabilities, Large Vision-Language Models (LVLMs) frequently generate responses that are plausible but incorrect or unsupported—commonly referred to as hallucinations. In this study, we investigate whether different types of hallucinations are reflected in the model’s internal representations by probing their encoded features. We focus on two key causes of hallucination in multimodal reasoning: (1) over-reliance on textual priors and (2) preference for user prompts over conflicting visual evidence—factors identified in prior work as frequent and impactful. Our probing results reveal that hallucinations exhibit distinguishable representational patterns, suggesting the potential for a representation-level approach to characterize and mitigate them. Motivated by these findings, we propose Steering HAllucination via RePresentation Engineering (SHARP), a representation-level intervention framework that modulates hallucination-related features during inference. SHARP identifies functional representations responsible for prior-driven biases and visual-context conflicts, and jointly adjusts the model’s internal activations in real time. We evaluate our approach extensively on three large vision-language models across multiple benchmarks. Experimental results demonstrate that SHARP effectively reduces hallucinations while preserving the performance and generalization capabilities of LVLMs.</abstract>
<identifier type="citekey">wu-etal-2025-sharp</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-main.725/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>14357</start>
<end>14372</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SHARP: Steering Hallucination in LVLMs via Representation Engineering
%A Wu, Junfei
%A Ding, Yue
%A Liu, Guofan
%A Xia, Tianze
%A Huang, Ziyue
%A Sui, Dianbo
%A Liu, Qiang
%A Wu, Shu
%A Wang, Liang
%A Tan, Tieniu
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-332-6
%F wu-etal-2025-sharp
%X Despite their impressive capabilities, Large Vision-Language Models (LVLMs) frequently generate responses that are plausible but incorrect or unsupported—commonly referred to as hallucinations. In this study, we investigate whether different types of hallucinations are reflected in the model’s internal representations by probing their encoded features. We focus on two key causes of hallucination in multimodal reasoning: (1) over-reliance on textual priors and (2) preference for user prompts over conflicting visual evidence—factors identified in prior work as frequent and impactful. Our probing results reveal that hallucinations exhibit distinguishable representational patterns, suggesting the potential for a representation-level approach to characterize and mitigate them. Motivated by these findings, we propose Steering HAllucination via RePresentation Engineering (SHARP), a representation-level intervention framework that modulates hallucination-related features during inference. SHARP identifies functional representations responsible for prior-driven biases and visual-context conflicts, and jointly adjusts the model’s internal activations in real time. We evaluate our approach extensively on three large vision-language models across multiple benchmarks. Experimental results demonstrate that SHARP effectively reduces hallucinations while preserving the performance and generalization capabilities of LVLMs.
%U https://aclanthology.org/2025.emnlp-main.725/
%P 14357-14372
Markdown (Informal)
[SHARP: Steering Hallucination in LVLMs via Representation Engineering](https://aclanthology.org/2025.emnlp-main.725/) (Wu et al., EMNLP 2025)
ACL
- Junfei Wu, Yue Ding, Guofan Liu, Tianze Xia, Ziyue Huang, Dianbo Sui, Qiang Liu, Shu Wu, Liang Wang, and Tieniu Tan. 2025. SHARP: Steering Hallucination in LVLMs via Representation Engineering. In Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing, pages 14357–14372, Suzhou, China. Association for Computational Linguistics.