@inproceedings{bendahman-etal-2024-irit,
title = "{IRIT}-Berger-Levrault at {S}em{E}val-2024: How Sensitive Sentence Embeddings are to Hallucinations?",
author = "Bendahman, Nihed and
Pinel-sauvagnat, Karen and
Hubert, Gilles and
Billami, Mokhtar",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Tayyar Madabushi, Harish and
Da San Martino, Giovanni and
Rosenthal, Sara and
Ros{\'a}, Aiala},
booktitle = "Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.semeval-1.86",
doi = "10.18653/v1/2024.semeval-1.86",
pages = "573--578",
abstract = "This article presents our participation to Task 6 of SemEval-2024, named SHROOM (a Shared-task on Hallucinations and Related Observable Overgeneration Mistakes), which aims at detecting hallucinations. We propose two types of approaches for the task: the first one is based on sentence embeddings and cosine similarity metric, and the second one uses LLMs (Large Language Model). We found that LLMs fail to improve the performance achieved by embedding generation models. The latter outperform the baseline provided by the organizers, and our best system achieves 78{\%} accuracy.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bendahman-etal-2024-irit">
<titleInfo>
<title>IRIT-Berger-Levrault at SemEval-2024: How Sensitive Sentence Embeddings are to Hallucinations?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nihed</namePart>
<namePart type="family">Bendahman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karen</namePart>
<namePart type="family">Pinel-sauvagnat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gilles</namePart>
<namePart type="family">Hubert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mokhtar</namePart>
<namePart type="family">Billami</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harish</namePart>
<namePart type="family">Tayyar Madabushi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giovanni</namePart>
<namePart type="family">Da San Martino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Rosenthal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aiala</namePart>
<namePart type="family">Rosá</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This article presents our participation to Task 6 of SemEval-2024, named SHROOM (a Shared-task on Hallucinations and Related Observable Overgeneration Mistakes), which aims at detecting hallucinations. We propose two types of approaches for the task: the first one is based on sentence embeddings and cosine similarity metric, and the second one uses LLMs (Large Language Model). We found that LLMs fail to improve the performance achieved by embedding generation models. The latter outperform the baseline provided by the organizers, and our best system achieves 78% accuracy.</abstract>
<identifier type="citekey">bendahman-etal-2024-irit</identifier>
<identifier type="doi">10.18653/v1/2024.semeval-1.86</identifier>
<location>
<url>https://aclanthology.org/2024.semeval-1.86</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>573</start>
<end>578</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T IRIT-Berger-Levrault at SemEval-2024: How Sensitive Sentence Embeddings are to Hallucinations?
%A Bendahman, Nihed
%A Pinel-sauvagnat, Karen
%A Hubert, Gilles
%A Billami, Mokhtar
%Y Ojha, Atul Kr.
%Y Doğruöz, A. Seza
%Y Tayyar Madabushi, Harish
%Y Da San Martino, Giovanni
%Y Rosenthal, Sara
%Y Rosá, Aiala
%S Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F bendahman-etal-2024-irit
%X This article presents our participation to Task 6 of SemEval-2024, named SHROOM (a Shared-task on Hallucinations and Related Observable Overgeneration Mistakes), which aims at detecting hallucinations. We propose two types of approaches for the task: the first one is based on sentence embeddings and cosine similarity metric, and the second one uses LLMs (Large Language Model). We found that LLMs fail to improve the performance achieved by embedding generation models. The latter outperform the baseline provided by the organizers, and our best system achieves 78% accuracy.
%R 10.18653/v1/2024.semeval-1.86
%U https://aclanthology.org/2024.semeval-1.86
%U https://doi.org/10.18653/v1/2024.semeval-1.86
%P 573-578
Markdown (Informal)
[IRIT-Berger-Levrault at SemEval-2024: How Sensitive Sentence Embeddings are to Hallucinations?](https://aclanthology.org/2024.semeval-1.86) (Bendahman et al., SemEval 2024)
ACL