@inproceedings{huang-etal-2025-uir,
title = "uir-cis at {S}em{E}val-2025 Task 3: Detection of Hallucinations in Generated Text",
author = "Huang, Jia and
Zhao, Shuli and
Zhao, Yaru and
Chen, Tao and
Zhao, Weijia and
Lin, Hangui and
Chen, Yiyang and
Li, Binyang",
editor = "Rosenthal, Sara and
Ros{\'a}, Aiala and
Ghosh, Debanjan and
Zampieri, Marcos",
booktitle = "Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.semeval-1.134/",
pages = "1015--1022",
ISBN = "979-8-89176-273-2",
abstract = "The widespread deployment of large language models (LLMs) across diverse domains has underscored the critical need to ensure the credibility and accuracy of their generated content, particularly in the presence of hallucinations. These hallucinations can severely compromise both the practical performance of models and the security of their applications. In response to this issue, SemEval-2025 Task 3 Mu-SHROOM: Multilingual Shared-task on Hallucinations and Related Observable Overgeneration Mistakes introduces a more granular task for hallucination detection. This task seeks to identify hallucinations in text, accurately locate hallucinated segments, and assess their credibility. In this paper, we present a three-stage method for fine-grained hallucination detection and localization. First, we transform the text into a triplet representation, facilitating more precise hallucination analysis. Next, we leverage a large language model to generate fact-reference texts that correspond to the triplets. Finally, we employ a fact alignment strategy to identify and localize hallucinated segments by evaluating the semantic consistency between the extracted triplets and the generated reference texts. We evaluate our method on the unlabelled test set across all languages in Task 3, demonstrating strong detection performance and validating its effectiveness in multilingual contexts."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="huang-etal-2025-uir">
<titleInfo>
<title>uir-cis at SemEval-2025 Task 3: Detection of Hallucinations in Generated Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jia</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shuli</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yaru</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tao</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Weijia</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hangui</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yiyang</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Binyang</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Rosenthal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aiala</namePart>
<namePart type="family">Rosá</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debanjan</namePart>
<namePart type="family">Ghosh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-273-2</identifier>
</relatedItem>
<abstract>The widespread deployment of large language models (LLMs) across diverse domains has underscored the critical need to ensure the credibility and accuracy of their generated content, particularly in the presence of hallucinations. These hallucinations can severely compromise both the practical performance of models and the security of their applications. In response to this issue, SemEval-2025 Task 3 Mu-SHROOM: Multilingual Shared-task on Hallucinations and Related Observable Overgeneration Mistakes introduces a more granular task for hallucination detection. This task seeks to identify hallucinations in text, accurately locate hallucinated segments, and assess their credibility. In this paper, we present a three-stage method for fine-grained hallucination detection and localization. First, we transform the text into a triplet representation, facilitating more precise hallucination analysis. Next, we leverage a large language model to generate fact-reference texts that correspond to the triplets. Finally, we employ a fact alignment strategy to identify and localize hallucinated segments by evaluating the semantic consistency between the extracted triplets and the generated reference texts. We evaluate our method on the unlabelled test set across all languages in Task 3, demonstrating strong detection performance and validating its effectiveness in multilingual contexts.</abstract>
<identifier type="citekey">huang-etal-2025-uir</identifier>
<location>
<url>https://aclanthology.org/2025.semeval-1.134/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>1015</start>
<end>1022</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T uir-cis at SemEval-2025 Task 3: Detection of Hallucinations in Generated Text
%A Huang, Jia
%A Zhao, Shuli
%A Zhao, Yaru
%A Chen, Tao
%A Zhao, Weijia
%A Lin, Hangui
%A Chen, Yiyang
%A Li, Binyang
%Y Rosenthal, Sara
%Y Rosá, Aiala
%Y Ghosh, Debanjan
%Y Zampieri, Marcos
%S Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-273-2
%F huang-etal-2025-uir
%X The widespread deployment of large language models (LLMs) across diverse domains has underscored the critical need to ensure the credibility and accuracy of their generated content, particularly in the presence of hallucinations. These hallucinations can severely compromise both the practical performance of models and the security of their applications. In response to this issue, SemEval-2025 Task 3 Mu-SHROOM: Multilingual Shared-task on Hallucinations and Related Observable Overgeneration Mistakes introduces a more granular task for hallucination detection. This task seeks to identify hallucinations in text, accurately locate hallucinated segments, and assess their credibility. In this paper, we present a three-stage method for fine-grained hallucination detection and localization. First, we transform the text into a triplet representation, facilitating more precise hallucination analysis. Next, we leverage a large language model to generate fact-reference texts that correspond to the triplets. Finally, we employ a fact alignment strategy to identify and localize hallucinated segments by evaluating the semantic consistency between the extracted triplets and the generated reference texts. We evaluate our method on the unlabelled test set across all languages in Task 3, demonstrating strong detection performance and validating its effectiveness in multilingual contexts.
%U https://aclanthology.org/2025.semeval-1.134/
%P 1015-1022
Markdown (Informal)
[uir-cis at SemEval-2025 Task 3: Detection of Hallucinations in Generated Text](https://aclanthology.org/2025.semeval-1.134/) (Huang et al., SemEval 2025)
ACL
- Jia Huang, Shuli Zhao, Yaru Zhao, Tao Chen, Weijia Zhao, Hangui Lin, Yiyang Chen, and Binyang Li. 2025. uir-cis at SemEval-2025 Task 3: Detection of Hallucinations in Generated Text. In Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025), pages 1015–1022, Vienna, Austria. Association for Computational Linguistics.