@inproceedings{bahad-etal-2024-nootnoot,
title = "{N}oot{N}oot At {S}em{E}val-2024 Task 6: Hallucinations and Related Observable Overgeneration Mistakes Detection",
author = "Bahad, Sankalp and
Bhaskar, Yash and
Krishnamurthy, Parameswari",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Tayyar Madabushi, Harish and
Da San Martino, Giovanni and
Rosenthal, Sara and
Ros{\'a}, Aiala},
booktitle = "Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.semeval-1.139",
doi = "10.18653/v1/2024.semeval-1.139",
pages = "964--968",
abstract = "Semantic hallucinations in neural language gen-eration systems pose a significant challenge tothe reliability and accuracy of natural languageprocessing applications. Current neural mod-els often produce fluent but incorrect outputs,undermining the usefulness of generated text.In this study, we address the task of detectingsemantic hallucinations through the SHROOM(Semantic Hallucinations Real Or Mistakes)dataset, encompassing data from diverse NLGtasks such as definition modeling, machinetranslation, and paraphrase generation. We in-vestigate three methodologies: fine-tuning onlabelled training data, fine-tuning on labelledvalidation data, and a zero-shot approach usingthe Mixtral 8x7b instruct model. Our resultsdemonstrate the effectiveness of these method-ologies in identifying semantic hallucinations,with the zero-shot approach showing compet-itive performance without additional training.Our findings highlight the importance of robustdetection mechanisms for ensuring the accu-racy and reliability of neural language genera-tion systems.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bahad-etal-2024-nootnoot">
<titleInfo>
<title>NootNoot At SemEval-2024 Task 6: Hallucinations and Related Observable Overgeneration Mistakes Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sankalp</namePart>
<namePart type="family">Bahad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yash</namePart>
<namePart type="family">Bhaskar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Parameswari</namePart>
<namePart type="family">Krishnamurthy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harish</namePart>
<namePart type="family">Tayyar Madabushi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giovanni</namePart>
<namePart type="family">Da San Martino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Rosenthal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aiala</namePart>
<namePart type="family">Rosá</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Semantic hallucinations in neural language gen-eration systems pose a significant challenge tothe reliability and accuracy of natural languageprocessing applications. Current neural mod-els often produce fluent but incorrect outputs,undermining the usefulness of generated text.In this study, we address the task of detectingsemantic hallucinations through the SHROOM(Semantic Hallucinations Real Or Mistakes)dataset, encompassing data from diverse NLGtasks such as definition modeling, machinetranslation, and paraphrase generation. We in-vestigate three methodologies: fine-tuning onlabelled training data, fine-tuning on labelledvalidation data, and a zero-shot approach usingthe Mixtral 8x7b instruct model. Our resultsdemonstrate the effectiveness of these method-ologies in identifying semantic hallucinations,with the zero-shot approach showing compet-itive performance without additional training.Our findings highlight the importance of robustdetection mechanisms for ensuring the accu-racy and reliability of neural language genera-tion systems.</abstract>
<identifier type="citekey">bahad-etal-2024-nootnoot</identifier>
<identifier type="doi">10.18653/v1/2024.semeval-1.139</identifier>
<location>
<url>https://aclanthology.org/2024.semeval-1.139</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>964</start>
<end>968</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T NootNoot At SemEval-2024 Task 6: Hallucinations and Related Observable Overgeneration Mistakes Detection
%A Bahad, Sankalp
%A Bhaskar, Yash
%A Krishnamurthy, Parameswari
%Y Ojha, Atul Kr.
%Y Doğruöz, A. Seza
%Y Tayyar Madabushi, Harish
%Y Da San Martino, Giovanni
%Y Rosenthal, Sara
%Y Rosá, Aiala
%S Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F bahad-etal-2024-nootnoot
%X Semantic hallucinations in neural language gen-eration systems pose a significant challenge tothe reliability and accuracy of natural languageprocessing applications. Current neural mod-els often produce fluent but incorrect outputs,undermining the usefulness of generated text.In this study, we address the task of detectingsemantic hallucinations through the SHROOM(Semantic Hallucinations Real Or Mistakes)dataset, encompassing data from diverse NLGtasks such as definition modeling, machinetranslation, and paraphrase generation. We in-vestigate three methodologies: fine-tuning onlabelled training data, fine-tuning on labelledvalidation data, and a zero-shot approach usingthe Mixtral 8x7b instruct model. Our resultsdemonstrate the effectiveness of these method-ologies in identifying semantic hallucinations,with the zero-shot approach showing compet-itive performance without additional training.Our findings highlight the importance of robustdetection mechanisms for ensuring the accu-racy and reliability of neural language genera-tion systems.
%R 10.18653/v1/2024.semeval-1.139
%U https://aclanthology.org/2024.semeval-1.139
%U https://doi.org/10.18653/v1/2024.semeval-1.139
%P 964-968
Markdown (Informal)
[NootNoot At SemEval-2024 Task 6: Hallucinations and Related Observable Overgeneration Mistakes Detection](https://aclanthology.org/2024.semeval-1.139) (Bahad et al., SemEval 2024)
ACL