@inproceedings{loya-etal-2023-exploring,
title = "Exploring the Sensitivity of {LLM}s{'} Decision-Making Capabilities: Insights from Prompt Variations and Hyperparameters",
author = "Loya, Manikanta and
Sinha, Divya and
Futrell, Richard",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-emnlp.241",
doi = "10.18653/v1/2023.findings-emnlp.241",
pages = "3711--3716",
abstract = "The advancement of Large Language Models (LLMs) has led to their widespread use across a broad spectrum of tasks, including decision-making. Prior studies have compared the decision-making abilities of LLMs with those of humans from a psychological perspective. However, these studies have not always properly accounted for the sensitivity of LLMs{'} behavior to hyperparameters and variations in the prompt. In this study, we examine LLMs{'} performance on the Horizon decision-making task studied by Binz and Schulz (2023), analyzing how LLMs respond to variations in prompts and hyperparameters. By experimenting on three OpenAI language models possessing different capabilities, we observe that the decision-making abilities fluctuate based on the input prompts and temperature settings. Contrary to previous findings, language models display a human-like exploration{--}exploitation tradeoff after simple adjustments to the prompt.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="loya-etal-2023-exploring">
<titleInfo>
<title>Exploring the Sensitivity of LLMs’ Decision-Making Capabilities: Insights from Prompt Variations and Hyperparameters</title>
</titleInfo>
<name type="personal">
<namePart type="given">Manikanta</namePart>
<namePart type="family">Loya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Divya</namePart>
<namePart type="family">Sinha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="family">Futrell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The advancement of Large Language Models (LLMs) has led to their widespread use across a broad spectrum of tasks, including decision-making. Prior studies have compared the decision-making abilities of LLMs with those of humans from a psychological perspective. However, these studies have not always properly accounted for the sensitivity of LLMs’ behavior to hyperparameters and variations in the prompt. In this study, we examine LLMs’ performance on the Horizon decision-making task studied by Binz and Schulz (2023), analyzing how LLMs respond to variations in prompts and hyperparameters. By experimenting on three OpenAI language models possessing different capabilities, we observe that the decision-making abilities fluctuate based on the input prompts and temperature settings. Contrary to previous findings, language models display a human-like exploration–exploitation tradeoff after simple adjustments to the prompt.</abstract>
<identifier type="citekey">loya-etal-2023-exploring</identifier>
<identifier type="doi">10.18653/v1/2023.findings-emnlp.241</identifier>
<location>
<url>https://aclanthology.org/2023.findings-emnlp.241</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>3711</start>
<end>3716</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Exploring the Sensitivity of LLMs’ Decision-Making Capabilities: Insights from Prompt Variations and Hyperparameters
%A Loya, Manikanta
%A Sinha, Divya
%A Futrell, Richard
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Findings of the Association for Computational Linguistics: EMNLP 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F loya-etal-2023-exploring
%X The advancement of Large Language Models (LLMs) has led to their widespread use across a broad spectrum of tasks, including decision-making. Prior studies have compared the decision-making abilities of LLMs with those of humans from a psychological perspective. However, these studies have not always properly accounted for the sensitivity of LLMs’ behavior to hyperparameters and variations in the prompt. In this study, we examine LLMs’ performance on the Horizon decision-making task studied by Binz and Schulz (2023), analyzing how LLMs respond to variations in prompts and hyperparameters. By experimenting on three OpenAI language models possessing different capabilities, we observe that the decision-making abilities fluctuate based on the input prompts and temperature settings. Contrary to previous findings, language models display a human-like exploration–exploitation tradeoff after simple adjustments to the prompt.
%R 10.18653/v1/2023.findings-emnlp.241
%U https://aclanthology.org/2023.findings-emnlp.241
%U https://doi.org/10.18653/v1/2023.findings-emnlp.241
%P 3711-3716
Markdown (Informal)
[Exploring the Sensitivity of LLMs’ Decision-Making Capabilities: Insights from Prompt Variations and Hyperparameters](https://aclanthology.org/2023.findings-emnlp.241) (Loya et al., Findings 2023)
ACL