@inproceedings{sadeghi-etal-2024-utebc,
title = "u{T}e{BC}-{NLP} at {S}em{E}val-2024 Task 9: Can {LLM}s be Lateral Thinkers?",
author = "Sadeghi, Pouya and
Abaskohi, Amirhossein and
Yaghoobzadeh, Yadollah",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Tayyar Madabushi, Harish and
Da San Martino, Giovanni and
Rosenthal, Sara and
Ros{\'a}, Aiala},
booktitle = "Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.semeval-1.251",
doi = "10.18653/v1/2024.semeval-1.251",
pages = "1767--1778",
abstract = "Inspired by human cognition, Jiang et al. 2023 create a benchmark for assessing LLMs{'} lateral thinking{---}thinking outside the box. Building upon this benchmark, we investigate how different prompting methods enhance LLMs{'} performance on this task to reveal their inherent power for outside-the-box thinking ability. Through participating in SemEval-2024, task 9, Sentence Puzzle sub-task, we explore prompt engineering methods: chain of thoughts (CoT) and direct prompting, enhancing with informative descriptions, and employing contextualizing prompts using a retrieval augmented generation (RAG) pipeline. Our experiments involve three LLMs including GPT-3.5, GPT-4, and Zephyr-7B-beta. We generate a dataset of thinking paths between riddles and options using GPT-4, validated by humans for quality. Findings indicate that compressed informative prompts enhance performance. Dynamic in-context learning enhances model performance significantly. Furthermore, fine-tuning Zephyr on our dataset enhances performance across other commonsense datasets, underscoring the value of innovative thinking.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sadeghi-etal-2024-utebc">
<titleInfo>
<title>uTeBC-NLP at SemEval-2024 Task 9: Can LLMs be Lateral Thinkers?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pouya</namePart>
<namePart type="family">Sadeghi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amirhossein</namePart>
<namePart type="family">Abaskohi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yadollah</namePart>
<namePart type="family">Yaghoobzadeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harish</namePart>
<namePart type="family">Tayyar Madabushi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giovanni</namePart>
<namePart type="family">Da San Martino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Rosenthal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aiala</namePart>
<namePart type="family">Rosá</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Inspired by human cognition, Jiang et al. 2023 create a benchmark for assessing LLMs’ lateral thinking—thinking outside the box. Building upon this benchmark, we investigate how different prompting methods enhance LLMs’ performance on this task to reveal their inherent power for outside-the-box thinking ability. Through participating in SemEval-2024, task 9, Sentence Puzzle sub-task, we explore prompt engineering methods: chain of thoughts (CoT) and direct prompting, enhancing with informative descriptions, and employing contextualizing prompts using a retrieval augmented generation (RAG) pipeline. Our experiments involve three LLMs including GPT-3.5, GPT-4, and Zephyr-7B-beta. We generate a dataset of thinking paths between riddles and options using GPT-4, validated by humans for quality. Findings indicate that compressed informative prompts enhance performance. Dynamic in-context learning enhances model performance significantly. Furthermore, fine-tuning Zephyr on our dataset enhances performance across other commonsense datasets, underscoring the value of innovative thinking.</abstract>
<identifier type="citekey">sadeghi-etal-2024-utebc</identifier>
<identifier type="doi">10.18653/v1/2024.semeval-1.251</identifier>
<location>
<url>https://aclanthology.org/2024.semeval-1.251</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>1767</start>
<end>1778</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T uTeBC-NLP at SemEval-2024 Task 9: Can LLMs be Lateral Thinkers?
%A Sadeghi, Pouya
%A Abaskohi, Amirhossein
%A Yaghoobzadeh, Yadollah
%Y Ojha, Atul Kr.
%Y Doğruöz, A. Seza
%Y Tayyar Madabushi, Harish
%Y Da San Martino, Giovanni
%Y Rosenthal, Sara
%Y Rosá, Aiala
%S Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F sadeghi-etal-2024-utebc
%X Inspired by human cognition, Jiang et al. 2023 create a benchmark for assessing LLMs’ lateral thinking—thinking outside the box. Building upon this benchmark, we investigate how different prompting methods enhance LLMs’ performance on this task to reveal their inherent power for outside-the-box thinking ability. Through participating in SemEval-2024, task 9, Sentence Puzzle sub-task, we explore prompt engineering methods: chain of thoughts (CoT) and direct prompting, enhancing with informative descriptions, and employing contextualizing prompts using a retrieval augmented generation (RAG) pipeline. Our experiments involve three LLMs including GPT-3.5, GPT-4, and Zephyr-7B-beta. We generate a dataset of thinking paths between riddles and options using GPT-4, validated by humans for quality. Findings indicate that compressed informative prompts enhance performance. Dynamic in-context learning enhances model performance significantly. Furthermore, fine-tuning Zephyr on our dataset enhances performance across other commonsense datasets, underscoring the value of innovative thinking.
%R 10.18653/v1/2024.semeval-1.251
%U https://aclanthology.org/2024.semeval-1.251
%U https://doi.org/10.18653/v1/2024.semeval-1.251
%P 1767-1778
Markdown (Informal)
[uTeBC-NLP at SemEval-2024 Task 9: Can LLMs be Lateral Thinkers?](https://aclanthology.org/2024.semeval-1.251) (Sadeghi et al., SemEval 2024)
ACL