@inproceedings{son-etal-2025-lightweight,
title = "Lightweight Query Checkpoint: Classifying Faulty User Queries to Mitigate Hallucinations in Large Language Model Question Answering",
author = "Son, Minjoo and
Jang, Jonghak and
Kim, Misuk",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.756/",
doi = "10.18653/v1/2025.findings-acl.756",
pages = "14664--14677",
ISBN = "979-8-89176-256-5",
abstract = "Question Answering (QA) with large language models has shown impressive performance, yet hallucinations still persist, particularly when user queries carry incorrect premises, insufficient context, or linguistic ambiguity. To address this issue, we propose Lightweight Query Checkpoint (LQC), a small classification model that detects verification-required queries before the LLM generates a potentially faulty answer. LQC leverages hidden states extracted from intermediate layers of a smaller-scale, non-instruct-tuned LLM to effectively distinguish queries requiring verification from clear queries. We first systematically define categories of queries that need verification, construct a dataset comprising both defective and clear queries, and train a binary contrastive learning model. Through extensive experiments on various QA datasets, we demonstrate that incorporating LQC into QA pipelines reduces hallucinations while preserving strong answer quality."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="son-etal-2025-lightweight">
<titleInfo>
<title>Lightweight Query Checkpoint: Classifying Faulty User Queries to Mitigate Hallucinations in Large Language Model Question Answering</title>
</titleInfo>
<name type="personal">
<namePart type="given">Minjoo</namePart>
<namePart type="family">Son</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonghak</namePart>
<namePart type="family">Jang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Misuk</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-256-5</identifier>
</relatedItem>
<abstract>Question Answering (QA) with large language models has shown impressive performance, yet hallucinations still persist, particularly when user queries carry incorrect premises, insufficient context, or linguistic ambiguity. To address this issue, we propose Lightweight Query Checkpoint (LQC), a small classification model that detects verification-required queries before the LLM generates a potentially faulty answer. LQC leverages hidden states extracted from intermediate layers of a smaller-scale, non-instruct-tuned LLM to effectively distinguish queries requiring verification from clear queries. We first systematically define categories of queries that need verification, construct a dataset comprising both defective and clear queries, and train a binary contrastive learning model. Through extensive experiments on various QA datasets, we demonstrate that incorporating LQC into QA pipelines reduces hallucinations while preserving strong answer quality.</abstract>
<identifier type="citekey">son-etal-2025-lightweight</identifier>
<identifier type="doi">10.18653/v1/2025.findings-acl.756</identifier>
<location>
<url>https://aclanthology.org/2025.findings-acl.756/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>14664</start>
<end>14677</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Lightweight Query Checkpoint: Classifying Faulty User Queries to Mitigate Hallucinations in Large Language Model Question Answering
%A Son, Minjoo
%A Jang, Jonghak
%A Kim, Misuk
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Findings of the Association for Computational Linguistics: ACL 2025
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-256-5
%F son-etal-2025-lightweight
%X Question Answering (QA) with large language models has shown impressive performance, yet hallucinations still persist, particularly when user queries carry incorrect premises, insufficient context, or linguistic ambiguity. To address this issue, we propose Lightweight Query Checkpoint (LQC), a small classification model that detects verification-required queries before the LLM generates a potentially faulty answer. LQC leverages hidden states extracted from intermediate layers of a smaller-scale, non-instruct-tuned LLM to effectively distinguish queries requiring verification from clear queries. We first systematically define categories of queries that need verification, construct a dataset comprising both defective and clear queries, and train a binary contrastive learning model. Through extensive experiments on various QA datasets, we demonstrate that incorporating LQC into QA pipelines reduces hallucinations while preserving strong answer quality.
%R 10.18653/v1/2025.findings-acl.756
%U https://aclanthology.org/2025.findings-acl.756/
%U https://doi.org/10.18653/v1/2025.findings-acl.756
%P 14664-14677
Markdown (Informal)
[Lightweight Query Checkpoint: Classifying Faulty User Queries to Mitigate Hallucinations in Large Language Model Question Answering](https://aclanthology.org/2025.findings-acl.756/) (Son et al., Findings 2025)
ACL