@inproceedings{cao-etal-2025-detecting,
title = "Detecting Hallucinations in Scientific Claims by Combining Prompting Strategies and Internal State Classification",
author = "Cao, Yupeng and
Yu, Chun-Nam and
Subbalakshmi, K.p.",
editor = "Ghosal, Tirthankar and
Mayr, Philipp and
Singh, Amanpreet and
Naik, Aakanksha and
Rehm, Georg and
Freitag, Dayne and
Li, Dan and
Schimmler, Sonja and
De Waard, Anita",
booktitle = "Proceedings of the Fifth Workshop on Scholarly Document Processing (SDP 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.sdp-1.30/",
doi = "10.18653/v1/2025.sdp-1.30",
pages = "316--327",
ISBN = "979-8-89176-265-7",
abstract = "Large Language Model (LLM){--}based research assistant tools demonstrate impressive capabilities, yet their outputs may contain hallucinations that compromise reliability. Therefore, detecting hallucinations in automatically generated scientific content is essential. SciHal2025: Hallucination Detection for Scientific Content challenge @ ACL 2025 provides a valuable platform for advancing this goal. This paper presents our solution to the SciHal2025 challenge. Our method combines several prompting strategies with the fine-tuned base LLMs. We first benchmark multiple LLMs on the SciHal dataset. Next, we developed a detection pipeline that integrates few-shot and chain-of-thought prompting. Hidden representations extracted from the LLMs serve as features for an auxiliary classifier, further improving accuracy. Finally, we fine-tuned the selected base LLMs to enhance end-to-end performance. In this paper, we present comprehensive experimental results and discuss the implications of our findings for future hallucination detection research for scientific content."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cao-etal-2025-detecting">
<titleInfo>
<title>Detecting Hallucinations in Scientific Claims by Combining Prompting Strategies and Internal State Classification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yupeng</namePart>
<namePart type="family">Cao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chun-Nam</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">K.p.</namePart>
<namePart type="family">Subbalakshmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Workshop on Scholarly Document Processing (SDP 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tirthankar</namePart>
<namePart type="family">Ghosal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Mayr</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amanpreet</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aakanksha</namePart>
<namePart type="family">Naik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Georg</namePart>
<namePart type="family">Rehm</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dayne</namePart>
<namePart type="family">Freitag</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sonja</namePart>
<namePart type="family">Schimmler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anita</namePart>
<namePart type="family">De Waard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-265-7</identifier>
</relatedItem>
<abstract>Large Language Model (LLM)–based research assistant tools demonstrate impressive capabilities, yet their outputs may contain hallucinations that compromise reliability. Therefore, detecting hallucinations in automatically generated scientific content is essential. SciHal2025: Hallucination Detection for Scientific Content challenge @ ACL 2025 provides a valuable platform for advancing this goal. This paper presents our solution to the SciHal2025 challenge. Our method combines several prompting strategies with the fine-tuned base LLMs. We first benchmark multiple LLMs on the SciHal dataset. Next, we developed a detection pipeline that integrates few-shot and chain-of-thought prompting. Hidden representations extracted from the LLMs serve as features for an auxiliary classifier, further improving accuracy. Finally, we fine-tuned the selected base LLMs to enhance end-to-end performance. In this paper, we present comprehensive experimental results and discuss the implications of our findings for future hallucination detection research for scientific content.</abstract>
<identifier type="citekey">cao-etal-2025-detecting</identifier>
<identifier type="doi">10.18653/v1/2025.sdp-1.30</identifier>
<location>
<url>https://aclanthology.org/2025.sdp-1.30/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>316</start>
<end>327</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Detecting Hallucinations in Scientific Claims by Combining Prompting Strategies and Internal State Classification
%A Cao, Yupeng
%A Yu, Chun-Nam
%A Subbalakshmi, K.p.
%Y Ghosal, Tirthankar
%Y Mayr, Philipp
%Y Singh, Amanpreet
%Y Naik, Aakanksha
%Y Rehm, Georg
%Y Freitag, Dayne
%Y Li, Dan
%Y Schimmler, Sonja
%Y De Waard, Anita
%S Proceedings of the Fifth Workshop on Scholarly Document Processing (SDP 2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-265-7
%F cao-etal-2025-detecting
%X Large Language Model (LLM)–based research assistant tools demonstrate impressive capabilities, yet their outputs may contain hallucinations that compromise reliability. Therefore, detecting hallucinations in automatically generated scientific content is essential. SciHal2025: Hallucination Detection for Scientific Content challenge @ ACL 2025 provides a valuable platform for advancing this goal. This paper presents our solution to the SciHal2025 challenge. Our method combines several prompting strategies with the fine-tuned base LLMs. We first benchmark multiple LLMs on the SciHal dataset. Next, we developed a detection pipeline that integrates few-shot and chain-of-thought prompting. Hidden representations extracted from the LLMs serve as features for an auxiliary classifier, further improving accuracy. Finally, we fine-tuned the selected base LLMs to enhance end-to-end performance. In this paper, we present comprehensive experimental results and discuss the implications of our findings for future hallucination detection research for scientific content.
%R 10.18653/v1/2025.sdp-1.30
%U https://aclanthology.org/2025.sdp-1.30/
%U https://doi.org/10.18653/v1/2025.sdp-1.30
%P 316-327
Markdown (Informal)
[Detecting Hallucinations in Scientific Claims by Combining Prompting Strategies and Internal State Classification](https://aclanthology.org/2025.sdp-1.30/) (Cao et al., sdp 2025)
ACL