@inproceedings{rykov-etal-2025-smurfcat,
title = "{S}murf{C}at at {S}em{E}val-2025 Task 3: Bridging External Knowledge and Model Uncertainty for Enhanced Hallucination Detection",
author = "Rykov, Elisei and
Olisov, Valerii and
Savkin, Maksim and
Vazhentsev, Artem and
Titova, Kseniia and
Panchenko, Alexander and
Konovalov, Vasily and
Belikova, Julia",
editor = "Rosenthal, Sara and
Ros{\'a}, Aiala and
Ghosh, Debanjan and
Zampieri, Marcos",
booktitle = "Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.semeval-1.137/",
pages = "1034--1045",
ISBN = "979-8-89176-273-2",
abstract = "The Multilingual shared-task on Hallucinations and Related Observable Overgeneration Mistakes in the SemEval-2025 competition aims to detect hallucination spans in the outputs of instruction-tuned LLMs in a multilingual context. In this paper, we address the detection of span hallucinations by applying an ensemble of approaches. In particular, we synthesized a PsiloQA dataset and fine-tuned LLM to detect hallucination spans. In addition, we combined this approach with a white-box method based on uncertainty quantification techniques. Using our combined pipeline, we achieved 3rd place in detecting span hallucinations in Arabic, Catalan, Finnish, Italian, and ranked within the top ten for the rest of the languages."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rykov-etal-2025-smurfcat">
<titleInfo>
<title>SmurfCat at SemEval-2025 Task 3: Bridging External Knowledge and Model Uncertainty for Enhanced Hallucination Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elisei</namePart>
<namePart type="family">Rykov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Valerii</namePart>
<namePart type="family">Olisov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maksim</namePart>
<namePart type="family">Savkin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Artem</namePart>
<namePart type="family">Vazhentsev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kseniia</namePart>
<namePart type="family">Titova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Panchenko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vasily</namePart>
<namePart type="family">Konovalov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Belikova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Rosenthal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aiala</namePart>
<namePart type="family">Rosá</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debanjan</namePart>
<namePart type="family">Ghosh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-273-2</identifier>
</relatedItem>
<abstract>The Multilingual shared-task on Hallucinations and Related Observable Overgeneration Mistakes in the SemEval-2025 competition aims to detect hallucination spans in the outputs of instruction-tuned LLMs in a multilingual context. In this paper, we address the detection of span hallucinations by applying an ensemble of approaches. In particular, we synthesized a PsiloQA dataset and fine-tuned LLM to detect hallucination spans. In addition, we combined this approach with a white-box method based on uncertainty quantification techniques. Using our combined pipeline, we achieved 3rd place in detecting span hallucinations in Arabic, Catalan, Finnish, Italian, and ranked within the top ten for the rest of the languages.</abstract>
<identifier type="citekey">rykov-etal-2025-smurfcat</identifier>
<location>
<url>https://aclanthology.org/2025.semeval-1.137/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>1034</start>
<end>1045</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SmurfCat at SemEval-2025 Task 3: Bridging External Knowledge and Model Uncertainty for Enhanced Hallucination Detection
%A Rykov, Elisei
%A Olisov, Valerii
%A Savkin, Maksim
%A Vazhentsev, Artem
%A Titova, Kseniia
%A Panchenko, Alexander
%A Konovalov, Vasily
%A Belikova, Julia
%Y Rosenthal, Sara
%Y Rosá, Aiala
%Y Ghosh, Debanjan
%Y Zampieri, Marcos
%S Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-273-2
%F rykov-etal-2025-smurfcat
%X The Multilingual shared-task on Hallucinations and Related Observable Overgeneration Mistakes in the SemEval-2025 competition aims to detect hallucination spans in the outputs of instruction-tuned LLMs in a multilingual context. In this paper, we address the detection of span hallucinations by applying an ensemble of approaches. In particular, we synthesized a PsiloQA dataset and fine-tuned LLM to detect hallucination spans. In addition, we combined this approach with a white-box method based on uncertainty quantification techniques. Using our combined pipeline, we achieved 3rd place in detecting span hallucinations in Arabic, Catalan, Finnish, Italian, and ranked within the top ten for the rest of the languages.
%U https://aclanthology.org/2025.semeval-1.137/
%P 1034-1045
Markdown (Informal)
[SmurfCat at SemEval-2025 Task 3: Bridging External Knowledge and Model Uncertainty for Enhanced Hallucination Detection](https://aclanthology.org/2025.semeval-1.137/) (Rykov et al., SemEval 2025)
ACL
- Elisei Rykov, Valerii Olisov, Maksim Savkin, Artem Vazhentsev, Kseniia Titova, Alexander Panchenko, Vasily Konovalov, and Julia Belikova. 2025. SmurfCat at SemEval-2025 Task 3: Bridging External Knowledge and Model Uncertainty for Enhanced Hallucination Detection. In Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025), pages 1034–1045, Vienna, Austria. Association for Computational Linguistics.