@inproceedings{li-etal-2025-overview-scihal25,
title = "Overview of the {S}ci{H}al25 Shared Task on Hallucination Detection for Scientific Content",
author = "Li, Dan and
Palfi, Bogdan and
Zhang, Colin and
Subramanian, Jaiganesh and
Raudaschl, Adrian and
Kakita, Yoshiko and
De Waard, Anita and
Afzal, Zubair and
Tsatsaronis, Georgios",
editor = "Ghosal, Tirthankar and
Mayr, Philipp and
Singh, Amanpreet and
Naik, Aakanksha and
Rehm, Georg and
Freitag, Dayne and
Li, Dan and
Schimmler, Sonja and
De Waard, Anita",
booktitle = "Proceedings of the Fifth Workshop on Scholarly Document Processing (SDP 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.sdp-1.29/",
doi = "10.18653/v1/2025.sdp-1.29",
pages = "307--315",
ISBN = "979-8-89176-265-7",
abstract = "This paper provides an overview of the Hallucination Detection for Scientific Content (SciHal) shared task held in the 2025 ACL Scholarly Document Processing workshop. The task invites participants to detect hallucinated claims in answers to research-oriented questions generated by real-world GenAI-powered research assistants. This task is formulated as a multi-label classification problem, each instance consists of a question, an answer, an extracted claim, and supporting reference abstracts. Participants are asked to label claims under two subtasks: (1) coarse-grained detection with labels Entailment, Contradiction, or Unverifiable; and (2) fine-grained detection with a more detailed taxonomy including 8 types.The dataset consists of 500 research-oriented questions collected over one week from a generative assistant tool. These questions were rewritten using GPT-4o and manually reviewed to address potential privacy or commercial concerns. In total, 10,000 reference abstracts were retrieved, and 4,592 claims were extracted from the assistant{'}s answers. Each claim is annotated with hallucination labels. The dataset is divided into 3,592 training, 500 validation, and 500 test instances.Subtask 1 saw 88 submissions across 10 teams while subtask 2 saw 39 submissions across 6 teams, resulting in a total of 5 published technical reports. This paper summarizes the task design, dataset, participation, and key findings."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="li-etal-2025-overview-scihal25">
<titleInfo>
<title>Overview of the SciHal25 Shared Task on Hallucination Detection for Scientific Content</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bogdan</namePart>
<namePart type="family">Palfi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Colin</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jaiganesh</namePart>
<namePart type="family">Subramanian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adrian</namePart>
<namePart type="family">Raudaschl</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yoshiko</namePart>
<namePart type="family">Kakita</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anita</namePart>
<namePart type="family">De Waard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zubair</namePart>
<namePart type="family">Afzal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Georgios</namePart>
<namePart type="family">Tsatsaronis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Workshop on Scholarly Document Processing (SDP 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tirthankar</namePart>
<namePart type="family">Ghosal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Mayr</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amanpreet</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aakanksha</namePart>
<namePart type="family">Naik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Georg</namePart>
<namePart type="family">Rehm</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dayne</namePart>
<namePart type="family">Freitag</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sonja</namePart>
<namePart type="family">Schimmler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anita</namePart>
<namePart type="family">De Waard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-265-7</identifier>
</relatedItem>
<abstract>This paper provides an overview of the Hallucination Detection for Scientific Content (SciHal) shared task held in the 2025 ACL Scholarly Document Processing workshop. The task invites participants to detect hallucinated claims in answers to research-oriented questions generated by real-world GenAI-powered research assistants. This task is formulated as a multi-label classification problem, each instance consists of a question, an answer, an extracted claim, and supporting reference abstracts. Participants are asked to label claims under two subtasks: (1) coarse-grained detection with labels Entailment, Contradiction, or Unverifiable; and (2) fine-grained detection with a more detailed taxonomy including 8 types.The dataset consists of 500 research-oriented questions collected over one week from a generative assistant tool. These questions were rewritten using GPT-4o and manually reviewed to address potential privacy or commercial concerns. In total, 10,000 reference abstracts were retrieved, and 4,592 claims were extracted from the assistant’s answers. Each claim is annotated with hallucination labels. The dataset is divided into 3,592 training, 500 validation, and 500 test instances.Subtask 1 saw 88 submissions across 10 teams while subtask 2 saw 39 submissions across 6 teams, resulting in a total of 5 published technical reports. This paper summarizes the task design, dataset, participation, and key findings.</abstract>
<identifier type="citekey">li-etal-2025-overview-scihal25</identifier>
<identifier type="doi">10.18653/v1/2025.sdp-1.29</identifier>
<location>
<url>https://aclanthology.org/2025.sdp-1.29/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>307</start>
<end>315</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Overview of the SciHal25 Shared Task on Hallucination Detection for Scientific Content
%A Li, Dan
%A Palfi, Bogdan
%A Zhang, Colin
%A Subramanian, Jaiganesh
%A Raudaschl, Adrian
%A Kakita, Yoshiko
%A De Waard, Anita
%A Afzal, Zubair
%A Tsatsaronis, Georgios
%Y Ghosal, Tirthankar
%Y Mayr, Philipp
%Y Singh, Amanpreet
%Y Naik, Aakanksha
%Y Rehm, Georg
%Y Freitag, Dayne
%Y Li, Dan
%Y Schimmler, Sonja
%Y De Waard, Anita
%S Proceedings of the Fifth Workshop on Scholarly Document Processing (SDP 2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-265-7
%F li-etal-2025-overview-scihal25
%X This paper provides an overview of the Hallucination Detection for Scientific Content (SciHal) shared task held in the 2025 ACL Scholarly Document Processing workshop. The task invites participants to detect hallucinated claims in answers to research-oriented questions generated by real-world GenAI-powered research assistants. This task is formulated as a multi-label classification problem, each instance consists of a question, an answer, an extracted claim, and supporting reference abstracts. Participants are asked to label claims under two subtasks: (1) coarse-grained detection with labels Entailment, Contradiction, or Unverifiable; and (2) fine-grained detection with a more detailed taxonomy including 8 types.The dataset consists of 500 research-oriented questions collected over one week from a generative assistant tool. These questions were rewritten using GPT-4o and manually reviewed to address potential privacy or commercial concerns. In total, 10,000 reference abstracts were retrieved, and 4,592 claims were extracted from the assistant’s answers. Each claim is annotated with hallucination labels. The dataset is divided into 3,592 training, 500 validation, and 500 test instances.Subtask 1 saw 88 submissions across 10 teams while subtask 2 saw 39 submissions across 6 teams, resulting in a total of 5 published technical reports. This paper summarizes the task design, dataset, participation, and key findings.
%R 10.18653/v1/2025.sdp-1.29
%U https://aclanthology.org/2025.sdp-1.29/
%U https://doi.org/10.18653/v1/2025.sdp-1.29
%P 307-315
Markdown (Informal)
[Overview of the SciHal25 Shared Task on Hallucination Detection for Scientific Content](https://aclanthology.org/2025.sdp-1.29/) (Li et al., sdp 2025)
ACL
- Dan Li, Bogdan Palfi, Colin Zhang, Jaiganesh Subramanian, Adrian Raudaschl, Yoshiko Kakita, Anita De Waard, Zubair Afzal, and Georgios Tsatsaronis. 2025. Overview of the SciHal25 Shared Task on Hallucination Detection for Scientific Content. In Proceedings of the Fifth Workshop on Scholarly Document Processing (SDP 2025), pages 307–315, Vienna, Austria. Association for Computational Linguistics.