@inproceedings{siino-2024-brainllama,
title = "{B}rain{L}lama at {S}em{E}val-2024 Task 6: Prompting Llama to detect hallucinations and related observable overgeneration mistakes",
author = "Siino, Marco",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Tayyar Madabushi, Harish and
Da San Martino, Giovanni and
Rosenthal, Sara and
Ros{\'a}, Aiala},
booktitle = "Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.semeval-1.14",
doi = "10.18653/v1/2024.semeval-1.14",
pages = "82--87",
abstract = "Participants in the SemEval-2024 Task 6 were tasked with executing binary classification aimed at discerning instances of fluent overgeneration hallucinations across two distinct setups: the model-aware and model-agnostic tracks. That is, participants must detect grammatically sound outputs which contain incorrect or unsupported semantic information, regardless of whether they had access to the model responsible for producing the output or not, within the model-aware and model-agnostic tracks. Two tracks were proposed for the task: a model-aware track, where organizers provided a checkpoint to a model publicly available on HuggingFace for every data point considered, and a model-agnostic track where the organizers do not. In this paper, we discuss the application of a Llama model to address both the tracks. Find the persuasive strategy that a meme employs from a hierarchy of twenty based just on its {``}textual content.{''} Only a portion of the reward is awarded if the technique{'}s ancestor node is chosen. This classification issue is multilabel hierarchical. Our approach reaches an accuracy of 0.62 on the agnostic track and of 0.67 on the aware track.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="siino-2024-brainllama">
<titleInfo>
<title>BrainLlama at SemEval-2024 Task 6: Prompting Llama to detect hallucinations and related observable overgeneration mistakes</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Siino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harish</namePart>
<namePart type="family">Tayyar Madabushi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giovanni</namePart>
<namePart type="family">Da San Martino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Rosenthal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aiala</namePart>
<namePart type="family">Rosá</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Participants in the SemEval-2024 Task 6 were tasked with executing binary classification aimed at discerning instances of fluent overgeneration hallucinations across two distinct setups: the model-aware and model-agnostic tracks. That is, participants must detect grammatically sound outputs which contain incorrect or unsupported semantic information, regardless of whether they had access to the model responsible for producing the output or not, within the model-aware and model-agnostic tracks. Two tracks were proposed for the task: a model-aware track, where organizers provided a checkpoint to a model publicly available on HuggingFace for every data point considered, and a model-agnostic track where the organizers do not. In this paper, we discuss the application of a Llama model to address both the tracks. Find the persuasive strategy that a meme employs from a hierarchy of twenty based just on its “textual content.” Only a portion of the reward is awarded if the technique’s ancestor node is chosen. This classification issue is multilabel hierarchical. Our approach reaches an accuracy of 0.62 on the agnostic track and of 0.67 on the aware track.</abstract>
<identifier type="citekey">siino-2024-brainllama</identifier>
<identifier type="doi">10.18653/v1/2024.semeval-1.14</identifier>
<location>
<url>https://aclanthology.org/2024.semeval-1.14</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>82</start>
<end>87</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BrainLlama at SemEval-2024 Task 6: Prompting Llama to detect hallucinations and related observable overgeneration mistakes
%A Siino, Marco
%Y Ojha, Atul Kr.
%Y Doğruöz, A. Seza
%Y Tayyar Madabushi, Harish
%Y Da San Martino, Giovanni
%Y Rosenthal, Sara
%Y Rosá, Aiala
%S Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F siino-2024-brainllama
%X Participants in the SemEval-2024 Task 6 were tasked with executing binary classification aimed at discerning instances of fluent overgeneration hallucinations across two distinct setups: the model-aware and model-agnostic tracks. That is, participants must detect grammatically sound outputs which contain incorrect or unsupported semantic information, regardless of whether they had access to the model responsible for producing the output or not, within the model-aware and model-agnostic tracks. Two tracks were proposed for the task: a model-aware track, where organizers provided a checkpoint to a model publicly available on HuggingFace for every data point considered, and a model-agnostic track where the organizers do not. In this paper, we discuss the application of a Llama model to address both the tracks. Find the persuasive strategy that a meme employs from a hierarchy of twenty based just on its “textual content.” Only a portion of the reward is awarded if the technique’s ancestor node is chosen. This classification issue is multilabel hierarchical. Our approach reaches an accuracy of 0.62 on the agnostic track and of 0.67 on the aware track.
%R 10.18653/v1/2024.semeval-1.14
%U https://aclanthology.org/2024.semeval-1.14
%U https://doi.org/10.18653/v1/2024.semeval-1.14
%P 82-87
Markdown (Informal)
[BrainLlama at SemEval-2024 Task 6: Prompting Llama to detect hallucinations and related observable overgeneration mistakes](https://aclanthology.org/2024.semeval-1.14) (Siino, SemEval 2024)
ACL