@inproceedings{gautam-etal-2026-energy,
title = "The Energy of Falsehood: Detecting Hallucinations via Diffusion Model Likelihoods",
author = "Gautam, Arpit Singh and
Talreja, Kailash and
Jha, Saurabh",
editor = "Akhtar, Mubashara and
Aly, Rami and
Cao, Rui and
Christodoulopoulos, Christos and
Cocarascu, Oana and
Guo, Zhijiang and
Mittal, Arpit and
Schlichtkrull, Michael and
Thorne, James and
Vlachos, Andreas",
booktitle = "Proceedings of the Ninth Fact Extraction and {VER}ification Workshop ({FEVER})",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.fever-1.4/",
pages = "47--58",
ISBN = "979-8-89176-365-4",
abstract = "Large Language Models (LLMs) frequently ``hallucinate'' plausible but incorrect assertions, a vulnerability often missed by uncertainty metrics when models are ``confidently wrong.'' We propose DiffuTruth, an unsupervised framework that re-conceptualizes fact verification via non-equilibrium thermodynamics, positing that factual truths act as stable attractors on a generative manifold while hallucinations are unstable. We introduce the ``Generative Stress Test'': claims are corrupted with noise and reconstructed using a discrete text diffusion model. We define Semantic Energy, a metric measuring the semantic divergence between the original claim and its reconstruction using an NLI critic. Unlike vector-space errors, Semantic Energy isolates deep factual contradictions. We further propose a Hybrid Calibration fusing this stability signal with discriminative confidence. Extensive experiments on FEVER demonstrate DiffuTruth achieves a state-of-the-art unsupervised AUROC of 0.725, outperforming baselines by +1.5{\%} through the correction of overconfident predictions. Furthermore, we show superior zero-shot generalization on the multi-hop HOVER dataset, outperforming baselines by over 4{\%}, confirming the robustness of thermodynamic truth properties to distribution shifts."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gautam-etal-2026-energy">
<titleInfo>
<title>The Energy of Falsehood: Detecting Hallucinations via Diffusion Model Likelihoods</title>
</titleInfo>
<name type="personal">
<namePart type="given">Arpit</namePart>
<namePart type="given">Singh</namePart>
<namePart type="family">Gautam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kailash</namePart>
<namePart type="family">Talreja</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saurabh</namePart>
<namePart type="family">Jha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth Fact Extraction and VERification Workshop (FEVER)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mubashara</namePart>
<namePart type="family">Akhtar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rami</namePart>
<namePart type="family">Aly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rui</namePart>
<namePart type="family">Cao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oana</namePart>
<namePart type="family">Cocarascu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhijiang</namePart>
<namePart type="family">Guo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arpit</namePart>
<namePart type="family">Mittal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Schlichtkrull</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Thorne</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Vlachos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-365-4</identifier>
</relatedItem>
<abstract>Large Language Models (LLMs) frequently “hallucinate” plausible but incorrect assertions, a vulnerability often missed by uncertainty metrics when models are “confidently wrong.” We propose DiffuTruth, an unsupervised framework that re-conceptualizes fact verification via non-equilibrium thermodynamics, positing that factual truths act as stable attractors on a generative manifold while hallucinations are unstable. We introduce the “Generative Stress Test”: claims are corrupted with noise and reconstructed using a discrete text diffusion model. We define Semantic Energy, a metric measuring the semantic divergence between the original claim and its reconstruction using an NLI critic. Unlike vector-space errors, Semantic Energy isolates deep factual contradictions. We further propose a Hybrid Calibration fusing this stability signal with discriminative confidence. Extensive experiments on FEVER demonstrate DiffuTruth achieves a state-of-the-art unsupervised AUROC of 0.725, outperforming baselines by +1.5% through the correction of overconfident predictions. Furthermore, we show superior zero-shot generalization on the multi-hop HOVER dataset, outperforming baselines by over 4%, confirming the robustness of thermodynamic truth properties to distribution shifts.</abstract>
<identifier type="citekey">gautam-etal-2026-energy</identifier>
<location>
<url>https://aclanthology.org/2026.fever-1.4/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>47</start>
<end>58</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Energy of Falsehood: Detecting Hallucinations via Diffusion Model Likelihoods
%A Gautam, Arpit Singh
%A Talreja, Kailash
%A Jha, Saurabh
%Y Akhtar, Mubashara
%Y Aly, Rami
%Y Cao, Rui
%Y Christodoulopoulos, Christos
%Y Cocarascu, Oana
%Y Guo, Zhijiang
%Y Mittal, Arpit
%Y Schlichtkrull, Michael
%Y Thorne, James
%Y Vlachos, Andreas
%S Proceedings of the Ninth Fact Extraction and VERification Workshop (FEVER)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-365-4
%F gautam-etal-2026-energy
%X Large Language Models (LLMs) frequently “hallucinate” plausible but incorrect assertions, a vulnerability often missed by uncertainty metrics when models are “confidently wrong.” We propose DiffuTruth, an unsupervised framework that re-conceptualizes fact verification via non-equilibrium thermodynamics, positing that factual truths act as stable attractors on a generative manifold while hallucinations are unstable. We introduce the “Generative Stress Test”: claims are corrupted with noise and reconstructed using a discrete text diffusion model. We define Semantic Energy, a metric measuring the semantic divergence between the original claim and its reconstruction using an NLI critic. Unlike vector-space errors, Semantic Energy isolates deep factual contradictions. We further propose a Hybrid Calibration fusing this stability signal with discriminative confidence. Extensive experiments on FEVER demonstrate DiffuTruth achieves a state-of-the-art unsupervised AUROC of 0.725, outperforming baselines by +1.5% through the correction of overconfident predictions. Furthermore, we show superior zero-shot generalization on the multi-hop HOVER dataset, outperforming baselines by over 4%, confirming the robustness of thermodynamic truth properties to distribution shifts.
%U https://aclanthology.org/2026.fever-1.4/
%P 47-58
Markdown (Informal)
[The Energy of Falsehood: Detecting Hallucinations via Diffusion Model Likelihoods](https://aclanthology.org/2026.fever-1.4/) (Gautam et al., FEVER 2026)
ACL