@inproceedings{heerema-etal-2025-raggedyfive,
title = "{R}aggedy{F}ive at {S}em{E}val-2025 Task 3: Hallucination Span Detection Using Unverifiable Answer Detection",
author = "Heerema, Wessel and
Krooneman, Collin and
Van Loon, Simon and
Top, Jelmer and
Voors, Maurice",
editor = "Rosenthal, Sara and
Ros{\'a}, Aiala and
Ghosh, Debanjan and
Zampieri, Marcos",
booktitle = "Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.semeval-1.194/",
pages = "1473--1478",
ISBN = "979-8-89176-273-2",
abstract = "Despite their broad utility, large language models (LLMs) are prone to hallucinations. The deviation from provided source inputs or disparateness with factual accuracy makes users question the reliability of LLMs. Therefore, detection systems for LLMs on hallucination are imperative. The system described in this paper detects hallucinated text spans by combining Retrieval-Augmented Generation (RAG) with Natural Language Interface (NLI). While zero-context handling of the RAG had little measurable effect, incorporating the RAG into a natural-language premise for the NLI yielded a noticeable improvement. Discrepancies can be attributed to labeling methodology and the implementation of the RAG."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="heerema-etal-2025-raggedyfive">
<titleInfo>
<title>RaggedyFive at SemEval-2025 Task 3: Hallucination Span Detection Using Unverifiable Answer Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wessel</namePart>
<namePart type="family">Heerema</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Collin</namePart>
<namePart type="family">Krooneman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Van Loon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jelmer</namePart>
<namePart type="family">Top</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maurice</namePart>
<namePart type="family">Voors</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Rosenthal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aiala</namePart>
<namePart type="family">Rosá</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debanjan</namePart>
<namePart type="family">Ghosh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-273-2</identifier>
</relatedItem>
<abstract>Despite their broad utility, large language models (LLMs) are prone to hallucinations. The deviation from provided source inputs or disparateness with factual accuracy makes users question the reliability of LLMs. Therefore, detection systems for LLMs on hallucination are imperative. The system described in this paper detects hallucinated text spans by combining Retrieval-Augmented Generation (RAG) with Natural Language Interface (NLI). While zero-context handling of the RAG had little measurable effect, incorporating the RAG into a natural-language premise for the NLI yielded a noticeable improvement. Discrepancies can be attributed to labeling methodology and the implementation of the RAG.</abstract>
<identifier type="citekey">heerema-etal-2025-raggedyfive</identifier>
<location>
<url>https://aclanthology.org/2025.semeval-1.194/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>1473</start>
<end>1478</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T RaggedyFive at SemEval-2025 Task 3: Hallucination Span Detection Using Unverifiable Answer Detection
%A Heerema, Wessel
%A Krooneman, Collin
%A Van Loon, Simon
%A Top, Jelmer
%A Voors, Maurice
%Y Rosenthal, Sara
%Y Rosá, Aiala
%Y Ghosh, Debanjan
%Y Zampieri, Marcos
%S Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-273-2
%F heerema-etal-2025-raggedyfive
%X Despite their broad utility, large language models (LLMs) are prone to hallucinations. The deviation from provided source inputs or disparateness with factual accuracy makes users question the reliability of LLMs. Therefore, detection systems for LLMs on hallucination are imperative. The system described in this paper detects hallucinated text spans by combining Retrieval-Augmented Generation (RAG) with Natural Language Interface (NLI). While zero-context handling of the RAG had little measurable effect, incorporating the RAG into a natural-language premise for the NLI yielded a noticeable improvement. Discrepancies can be attributed to labeling methodology and the implementation of the RAG.
%U https://aclanthology.org/2025.semeval-1.194/
%P 1473-1478
Markdown (Informal)
[RaggedyFive at SemEval-2025 Task 3: Hallucination Span Detection Using Unverifiable Answer Detection](https://aclanthology.org/2025.semeval-1.194/) (Heerema et al., SemEval 2025)
ACL