@inproceedings{obiso-etal-2024-harmonee,
title = "{H}a{RM}o{NEE} at {S}em{E}val-2024 Task 6: Tuning-based Approaches to Hallucination Recognition",
author = "Obiso, Timothy and
Tu, Jingxuan and
Pustejovsky, James",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Tayyar Madabushi, Harish and
Da San Martino, Giovanni and
Rosenthal, Sara and
Ros{\'a}, Aiala},
booktitle = "Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.semeval-1.191/",
doi = "10.18653/v1/2024.semeval-1.191",
pages = "1322--1331",
abstract = "This paper presents the Hallucination Recognition Model for New Experiment Evaluation (HaRMoNEE) team`s winning ({\#}1) and {\#}10 submissions for SemEval-2024 Task 6: Shared- task on Hallucinations and Related Observable Overgeneration Mistakes (SHROOM)`s two subtasks. This task challenged its participants to design systems to detect hallucinations in Large Language Model (LLM) outputs. Team HaRMoNEE proposes two architectures: (1) fine-tuning an off-the-shelf transformer-based model and (2) prompt tuning large-scale Large Language Models (LLMs). One submission from the fine-tuning approach outperformed all other submissions for the model-aware subtask; one submission from the prompt-tuning approach is the 10th-best submission on the leaderboard for the model-agnostic subtask. Our systems also include pre-processing, system-specific tuning, post-processing, and evaluation."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="obiso-etal-2024-harmonee">
<titleInfo>
<title>HaRMoNEE at SemEval-2024 Task 6: Tuning-based Approaches to Hallucination Recognition</title>
</titleInfo>
<name type="personal">
<namePart type="given">Timothy</namePart>
<namePart type="family">Obiso</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jingxuan</namePart>
<namePart type="family">Tu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Pustejovsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harish</namePart>
<namePart type="family">Tayyar Madabushi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giovanni</namePart>
<namePart type="family">Da San Martino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Rosenthal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aiala</namePart>
<namePart type="family">Rosá</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents the Hallucination Recognition Model for New Experiment Evaluation (HaRMoNEE) team‘s winning (#1) and #10 submissions for SemEval-2024 Task 6: Shared- task on Hallucinations and Related Observable Overgeneration Mistakes (SHROOM)‘s two subtasks. This task challenged its participants to design systems to detect hallucinations in Large Language Model (LLM) outputs. Team HaRMoNEE proposes two architectures: (1) fine-tuning an off-the-shelf transformer-based model and (2) prompt tuning large-scale Large Language Models (LLMs). One submission from the fine-tuning approach outperformed all other submissions for the model-aware subtask; one submission from the prompt-tuning approach is the 10th-best submission on the leaderboard for the model-agnostic subtask. Our systems also include pre-processing, system-specific tuning, post-processing, and evaluation.</abstract>
<identifier type="citekey">obiso-etal-2024-harmonee</identifier>
<identifier type="doi">10.18653/v1/2024.semeval-1.191</identifier>
<location>
<url>https://aclanthology.org/2024.semeval-1.191/</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>1322</start>
<end>1331</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T HaRMoNEE at SemEval-2024 Task 6: Tuning-based Approaches to Hallucination Recognition
%A Obiso, Timothy
%A Tu, Jingxuan
%A Pustejovsky, James
%Y Ojha, Atul Kr.
%Y Doğruöz, A. Seza
%Y Tayyar Madabushi, Harish
%Y Da San Martino, Giovanni
%Y Rosenthal, Sara
%Y Rosá, Aiala
%S Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F obiso-etal-2024-harmonee
%X This paper presents the Hallucination Recognition Model for New Experiment Evaluation (HaRMoNEE) team‘s winning (#1) and #10 submissions for SemEval-2024 Task 6: Shared- task on Hallucinations and Related Observable Overgeneration Mistakes (SHROOM)‘s two subtasks. This task challenged its participants to design systems to detect hallucinations in Large Language Model (LLM) outputs. Team HaRMoNEE proposes two architectures: (1) fine-tuning an off-the-shelf transformer-based model and (2) prompt tuning large-scale Large Language Models (LLMs). One submission from the fine-tuning approach outperformed all other submissions for the model-aware subtask; one submission from the prompt-tuning approach is the 10th-best submission on the leaderboard for the model-agnostic subtask. Our systems also include pre-processing, system-specific tuning, post-processing, and evaluation.
%R 10.18653/v1/2024.semeval-1.191
%U https://aclanthology.org/2024.semeval-1.191/
%U https://doi.org/10.18653/v1/2024.semeval-1.191
%P 1322-1331
Markdown (Informal)
[HaRMoNEE at SemEval-2024 Task 6: Tuning-based Approaches to Hallucination Recognition](https://aclanthology.org/2024.semeval-1.191/) (Obiso et al., SemEval 2024)
ACL