@inproceedings{fallah-etal-2024-slpl,
title = "{SLPL} {SHROOM} at {S}em{E}val2024 Task 06 : A comprehensive study on models ability to detect hallucination",
author = "Fallah, Pouya and
Gooran, Soroush and
Jafarinasab, Mohammad and
Sadeghi, Pouya and
Farnia, Reza and
Tarabkhah, Amirreza and
Taghavi, Zeinab Sadat and
Sameti, Hossein",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Tayyar Madabushi, Harish and
Da San Martino, Giovanni and
Rosenthal, Sara and
Ros{\'a}, Aiala},
booktitle = "Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.semeval-1.167",
doi = "10.18653/v1/2024.semeval-1.167",
pages = "1148--1154",
abstract = "Language models, particularly generative models, are susceptible to hallucinations, generating outputs that contradict factual knowledgeor the source text. This study explores methodsfor detecting hallucinations in three SemEval2024 Task 6 tasks: Machine Translation, Definition Modeling, and Paraphrase Generation.We evaluate two methods: semantic similaritybetween the generated text and factual references, and an ensemble of language modelsthat judge each other{'}s outputs. Our resultsshow that semantic similarity achieves moderate accuracy and correlation scores in trial data,while the ensemble method offers insights intothe complexities of hallucination detection butfalls short of expectations. This work highlights the challenges of hallucination detectionand underscores the need for further researchin this critical area.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fallah-etal-2024-slpl">
<titleInfo>
<title>SLPL SHROOM at SemEval2024 Task 06 : A comprehensive study on models ability to detect hallucination</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pouya</namePart>
<namePart type="family">Fallah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Soroush</namePart>
<namePart type="family">Gooran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="family">Jafarinasab</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pouya</namePart>
<namePart type="family">Sadeghi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Reza</namePart>
<namePart type="family">Farnia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amirreza</namePart>
<namePart type="family">Tarabkhah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zeinab</namePart>
<namePart type="given">Sadat</namePart>
<namePart type="family">Taghavi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hossein</namePart>
<namePart type="family">Sameti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harish</namePart>
<namePart type="family">Tayyar Madabushi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giovanni</namePart>
<namePart type="family">Da San Martino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Rosenthal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aiala</namePart>
<namePart type="family">Rosá</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Language models, particularly generative models, are susceptible to hallucinations, generating outputs that contradict factual knowledgeor the source text. This study explores methodsfor detecting hallucinations in three SemEval2024 Task 6 tasks: Machine Translation, Definition Modeling, and Paraphrase Generation.We evaluate two methods: semantic similaritybetween the generated text and factual references, and an ensemble of language modelsthat judge each other’s outputs. Our resultsshow that semantic similarity achieves moderate accuracy and correlation scores in trial data,while the ensemble method offers insights intothe complexities of hallucination detection butfalls short of expectations. This work highlights the challenges of hallucination detectionand underscores the need for further researchin this critical area.</abstract>
<identifier type="citekey">fallah-etal-2024-slpl</identifier>
<identifier type="doi">10.18653/v1/2024.semeval-1.167</identifier>
<location>
<url>https://aclanthology.org/2024.semeval-1.167</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>1148</start>
<end>1154</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SLPL SHROOM at SemEval2024 Task 06 : A comprehensive study on models ability to detect hallucination
%A Fallah, Pouya
%A Gooran, Soroush
%A Jafarinasab, Mohammad
%A Sadeghi, Pouya
%A Farnia, Reza
%A Tarabkhah, Amirreza
%A Taghavi, Zeinab Sadat
%A Sameti, Hossein
%Y Ojha, Atul Kr.
%Y Doğruöz, A. Seza
%Y Tayyar Madabushi, Harish
%Y Da San Martino, Giovanni
%Y Rosenthal, Sara
%Y Rosá, Aiala
%S Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F fallah-etal-2024-slpl
%X Language models, particularly generative models, are susceptible to hallucinations, generating outputs that contradict factual knowledgeor the source text. This study explores methodsfor detecting hallucinations in three SemEval2024 Task 6 tasks: Machine Translation, Definition Modeling, and Paraphrase Generation.We evaluate two methods: semantic similaritybetween the generated text and factual references, and an ensemble of language modelsthat judge each other’s outputs. Our resultsshow that semantic similarity achieves moderate accuracy and correlation scores in trial data,while the ensemble method offers insights intothe complexities of hallucination detection butfalls short of expectations. This work highlights the challenges of hallucination detectionand underscores the need for further researchin this critical area.
%R 10.18653/v1/2024.semeval-1.167
%U https://aclanthology.org/2024.semeval-1.167
%U https://doi.org/10.18653/v1/2024.semeval-1.167
%P 1148-1154
Markdown (Informal)
[SLPL SHROOM at SemEval2024 Task 06 : A comprehensive study on models ability to detect hallucination](https://aclanthology.org/2024.semeval-1.167) (Fallah et al., SemEval 2024)
ACL
- Pouya Fallah, Soroush Gooran, Mohammad Jafarinasab, Pouya Sadeghi, Reza Farnia, Amirreza Tarabkhah, Zeinab Sadat Taghavi, and Hossein Sameti. 2024. SLPL SHROOM at SemEval2024 Task 06 : A comprehensive study on models ability to detect hallucination. In Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024), pages 1148–1154, Mexico City, Mexico. Association for Computational Linguistics.