@inproceedings{maksimov-etal-2024-deeppavlov,
title = "{D}eep{P}avlov at {S}em{E}val-2024 Task 6: Detection of Hallucinations and Overgeneration Mistakes with an Ensemble of Transformer-based Models",
author = "Maksimov, Ivan and
Konovalov, Vasily and
Glinskii, Andrei",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Tayyar Madabushi, Harish and
Da San Martino, Giovanni and
Rosenthal, Sara and
Ros{\'a}, Aiala},
booktitle = "Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.semeval-1.42",
pages = "274--278",
abstract = "The inclination of large language models (LLMs) to produce mistaken assertions, known as hallucinations, can be problematic. These hallucinations could potentially be harmful since sporadic factual inaccuracies within the generated text might be concealed by the overall coherence of the content, making it immensely challenging for users to identify them. The goal of the SHROOM shared-task is to detect grammatically sound outputs that contain incorrect or unsupported semantic information. Although there are a lot of existing hallucination detectors in generated AI content, we found out that pretrained Natural Language Inference (NLI) models yet exhibit success in detecting hallucinations. Moreover their ensemble outperforms more complicated models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="maksimov-etal-2024-deeppavlov">
<titleInfo>
<title>DeepPavlov at SemEval-2024 Task 6: Detection of Hallucinations and Overgeneration Mistakes with an Ensemble of Transformer-based Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Maksimov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vasily</namePart>
<namePart type="family">Konovalov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrei</namePart>
<namePart type="family">Glinskii</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harish</namePart>
<namePart type="family">Tayyar Madabushi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giovanni</namePart>
<namePart type="family">Da San Martino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Rosenthal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aiala</namePart>
<namePart type="family">Rosá</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The inclination of large language models (LLMs) to produce mistaken assertions, known as hallucinations, can be problematic. These hallucinations could potentially be harmful since sporadic factual inaccuracies within the generated text might be concealed by the overall coherence of the content, making it immensely challenging for users to identify them. The goal of the SHROOM shared-task is to detect grammatically sound outputs that contain incorrect or unsupported semantic information. Although there are a lot of existing hallucination detectors in generated AI content, we found out that pretrained Natural Language Inference (NLI) models yet exhibit success in detecting hallucinations. Moreover their ensemble outperforms more complicated models.</abstract>
<identifier type="citekey">maksimov-etal-2024-deeppavlov</identifier>
<location>
<url>https://aclanthology.org/2024.semeval-1.42</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>274</start>
<end>278</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DeepPavlov at SemEval-2024 Task 6: Detection of Hallucinations and Overgeneration Mistakes with an Ensemble of Transformer-based Models
%A Maksimov, Ivan
%A Konovalov, Vasily
%A Glinskii, Andrei
%Y Ojha, Atul Kr.
%Y Doğruöz, A. Seza
%Y Tayyar Madabushi, Harish
%Y Da San Martino, Giovanni
%Y Rosenthal, Sara
%Y Rosá, Aiala
%S Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F maksimov-etal-2024-deeppavlov
%X The inclination of large language models (LLMs) to produce mistaken assertions, known as hallucinations, can be problematic. These hallucinations could potentially be harmful since sporadic factual inaccuracies within the generated text might be concealed by the overall coherence of the content, making it immensely challenging for users to identify them. The goal of the SHROOM shared-task is to detect grammatically sound outputs that contain incorrect or unsupported semantic information. Although there are a lot of existing hallucination detectors in generated AI content, we found out that pretrained Natural Language Inference (NLI) models yet exhibit success in detecting hallucinations. Moreover their ensemble outperforms more complicated models.
%U https://aclanthology.org/2024.semeval-1.42
%P 274-278
Markdown (Informal)
[DeepPavlov at SemEval-2024 Task 6: Detection of Hallucinations and Overgeneration Mistakes with an Ensemble of Transformer-based Models](https://aclanthology.org/2024.semeval-1.42) (Maksimov et al., SemEval 2024)
ACL