@inproceedings{hoblitzell-2026-beyond,
title = "Beyond Hallucination: Reframing {LLM} Quality Assessment as Task-Output Alignment",
author = "Hoblitzell, Andrew",
editor = "Elazar, Yanai and
Ettinger, Allyson and
Kassner, Nora and
Ruder, Sebastian",
booktitle = "Proceedings of The Big Picture v2: Crafting a Research Narrative",
month = jul,
year = "2026",
address = "San Diego, CA, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.bigpicture-main.3/",
doi = "10.18653/v1/2026.bigpicture-main.3",
pages = "22--30",
ISBN = "979-8-89176-416-3",
abstract = "Current hallucination detection systems operate under a flawed assumption: that model outputs deviating from factual grounding are uniformly problematic regardless of task context, modality, or cultural setting. Through analysis of computational humor as a motivating case study, we demonstrate that identical model behaviors require radically different evaluations depending on context. We propose reframing hallucination detection as task-output alignment assessment, introducing a three-dimensional framework spanning factual grounding requirements, novelty requirements, and risk tolerance."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hoblitzell-2026-beyond">
<titleInfo>
<title>Beyond Hallucination: Reframing LLM Quality Assessment as Task-Output Alignment</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andrew</namePart>
<namePart type="family">Hoblitzell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of The Big Picture v2: Crafting a Research Narrative</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yanai</namePart>
<namePart type="family">Elazar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Allyson</namePart>
<namePart type="family">Ettinger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nora</namePart>
<namePart type="family">Kassner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Ruder</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, CA, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-416-3</identifier>
</relatedItem>
<abstract>Current hallucination detection systems operate under a flawed assumption: that model outputs deviating from factual grounding are uniformly problematic regardless of task context, modality, or cultural setting. Through analysis of computational humor as a motivating case study, we demonstrate that identical model behaviors require radically different evaluations depending on context. We propose reframing hallucination detection as task-output alignment assessment, introducing a three-dimensional framework spanning factual grounding requirements, novelty requirements, and risk tolerance.</abstract>
<identifier type="citekey">hoblitzell-2026-beyond</identifier>
<identifier type="doi">10.18653/v1/2026.bigpicture-main.3</identifier>
<location>
<url>https://aclanthology.org/2026.bigpicture-main.3/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>22</start>
<end>30</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Beyond Hallucination: Reframing LLM Quality Assessment as Task-Output Alignment
%A Hoblitzell, Andrew
%Y Elazar, Yanai
%Y Ettinger, Allyson
%Y Kassner, Nora
%Y Ruder, Sebastian
%S Proceedings of The Big Picture v2: Crafting a Research Narrative
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, CA, USA
%@ 979-8-89176-416-3
%F hoblitzell-2026-beyond
%X Current hallucination detection systems operate under a flawed assumption: that model outputs deviating from factual grounding are uniformly problematic regardless of task context, modality, or cultural setting. Through analysis of computational humor as a motivating case study, we demonstrate that identical model behaviors require radically different evaluations depending on context. We propose reframing hallucination detection as task-output alignment assessment, introducing a three-dimensional framework spanning factual grounding requirements, novelty requirements, and risk tolerance.
%R 10.18653/v1/2026.bigpicture-main.3
%U https://aclanthology.org/2026.bigpicture-main.3/
%U https://doi.org/10.18653/v1/2026.bigpicture-main.3
%P 22-30
Markdown (Informal)
[Beyond Hallucination: Reframing LLM Quality Assessment as Task-Output Alignment](https://aclanthology.org/2026.bigpicture-main.3/) (Hoblitzell, BigPicture 2026)
ACL