@inproceedings{creo-etal-2025-cogumelo,
title = "{COGUMELO} at {S}em{E}val-2025 Task 3: A Synthetic Approach to Detecting Hallucinations in Language Models based on Named Entity Recognition",
author = "Creo, Aldan and
Cerezo - Costas, H{\'e}ctor and
Hormaz{\'a}bal Lagos, Maximiliano and
Alonso Doval, Pedro",
editor = "Rosenthal, Sara and
Ros{\'a}, Aiala and
Ghosh, Debanjan and
Zampieri, Marcos",
booktitle = "Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.semeval-1.281/",
pages = "2170--2176",
ISBN = "979-8-89176-273-2",
abstract = "In this paper, we propose an approach to detecting hallucinations based on a Named Entity Recognition (NER) task.We focus on efficiency, aiming to develop a model that can detect hallucinations without relying on external data sources or expensive computations that involve state-of-the-art large language models with upwards of tens of billions of parameters. We utilize the SQuAD question answering dataset to generate a synthetic version that contains both correct and hallucinated responses and train encoder language models of a moderate size (RoBERTa and FLAN-T5) to predict spans of text that are highly likely to contain a hallucination. We test our models on a separate dataset of expert-annotated question-answer pairs and find that our approach achieves a Jaccard similarity of up to 0.358 and 0.227 Spearman correlation, which suggests that our models can serve as moderately accurate hallucination detectors, ideally as part of a detection pipeline involving human supervision. We also observe that larger models seem to develop an emergent ability to leverage their background knowledge to make more informed decisions, while smaller models seem to take shortcuts that can lead to a higher number of false positives.We make our data and code publicly accessible, along with an online visualizer. We also release our trained models under an open license."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="creo-etal-2025-cogumelo">
<titleInfo>
<title>COGUMELO at SemEval-2025 Task 3: A Synthetic Approach to Detecting Hallucinations in Language Models based on Named Entity Recognition</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aldan</namePart>
<namePart type="family">Creo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Héctor</namePart>
<namePart type="family">Cerezo - Costas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maximiliano</namePart>
<namePart type="family">Hormazábal Lagos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pedro</namePart>
<namePart type="family">Alonso Doval</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Rosenthal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aiala</namePart>
<namePart type="family">Rosá</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debanjan</namePart>
<namePart type="family">Ghosh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-273-2</identifier>
</relatedItem>
<abstract>In this paper, we propose an approach to detecting hallucinations based on a Named Entity Recognition (NER) task.We focus on efficiency, aiming to develop a model that can detect hallucinations without relying on external data sources or expensive computations that involve state-of-the-art large language models with upwards of tens of billions of parameters. We utilize the SQuAD question answering dataset to generate a synthetic version that contains both correct and hallucinated responses and train encoder language models of a moderate size (RoBERTa and FLAN-T5) to predict spans of text that are highly likely to contain a hallucination. We test our models on a separate dataset of expert-annotated question-answer pairs and find that our approach achieves a Jaccard similarity of up to 0.358 and 0.227 Spearman correlation, which suggests that our models can serve as moderately accurate hallucination detectors, ideally as part of a detection pipeline involving human supervision. We also observe that larger models seem to develop an emergent ability to leverage their background knowledge to make more informed decisions, while smaller models seem to take shortcuts that can lead to a higher number of false positives.We make our data and code publicly accessible, along with an online visualizer. We also release our trained models under an open license.</abstract>
<identifier type="citekey">creo-etal-2025-cogumelo</identifier>
<location>
<url>https://aclanthology.org/2025.semeval-1.281/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>2170</start>
<end>2176</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T COGUMELO at SemEval-2025 Task 3: A Synthetic Approach to Detecting Hallucinations in Language Models based on Named Entity Recognition
%A Creo, Aldan
%A Cerezo - Costas, Héctor
%A Hormazábal Lagos, Maximiliano
%A Alonso Doval, Pedro
%Y Rosenthal, Sara
%Y Rosá, Aiala
%Y Ghosh, Debanjan
%Y Zampieri, Marcos
%S Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-273-2
%F creo-etal-2025-cogumelo
%X In this paper, we propose an approach to detecting hallucinations based on a Named Entity Recognition (NER) task.We focus on efficiency, aiming to develop a model that can detect hallucinations without relying on external data sources or expensive computations that involve state-of-the-art large language models with upwards of tens of billions of parameters. We utilize the SQuAD question answering dataset to generate a synthetic version that contains both correct and hallucinated responses and train encoder language models of a moderate size (RoBERTa and FLAN-T5) to predict spans of text that are highly likely to contain a hallucination. We test our models on a separate dataset of expert-annotated question-answer pairs and find that our approach achieves a Jaccard similarity of up to 0.358 and 0.227 Spearman correlation, which suggests that our models can serve as moderately accurate hallucination detectors, ideally as part of a detection pipeline involving human supervision. We also observe that larger models seem to develop an emergent ability to leverage their background knowledge to make more informed decisions, while smaller models seem to take shortcuts that can lead to a higher number of false positives.We make our data and code publicly accessible, along with an online visualizer. We also release our trained models under an open license.
%U https://aclanthology.org/2025.semeval-1.281/
%P 2170-2176
Markdown (Informal)
[COGUMELO at SemEval-2025 Task 3: A Synthetic Approach to Detecting Hallucinations in Language Models based on Named Entity Recognition](https://aclanthology.org/2025.semeval-1.281/) (Creo et al., SemEval 2025)
ACL