@inproceedings{goldstein-etal-2023-decoding,
title = "Decoding Stumpers: Large Language Models vs. Human Problem-Solvers",
author = "Goldstein, Alon and
Havin, Miriam and
Reichart, Roi and
Goldstein, Ariel",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-emnlp.779",
doi = "10.18653/v1/2023.findings-emnlp.779",
pages = "11644--11653",
abstract = "This paper investigates the problem-solving capabilities of Large Language Models (LLMs) by evaluating their performance on stumpers, unique single-step intuition problems that pose challenges for human solvers but are easily verifiable. We compare the performance of four state-of-the-art LLMs (Davinci-2, Davinci-3, GPT-3.5-Turbo, GPT-4) to human participants. Our findings reveal that the new-generation LLMs excel in solving stumpers and surpass human performance. However, humans exhibit superior skills in verifying solutions to the same problems. This research enhances our understanding of LLMs{'} cognitive abilities and provides insights for enhancing their problem-solving potential across various domains.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="goldstein-etal-2023-decoding">
<titleInfo>
<title>Decoding Stumpers: Large Language Models vs. Human Problem-Solvers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alon</namePart>
<namePart type="family">Goldstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Miriam</namePart>
<namePart type="family">Havin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roi</namePart>
<namePart type="family">Reichart</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ariel</namePart>
<namePart type="family">Goldstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper investigates the problem-solving capabilities of Large Language Models (LLMs) by evaluating their performance on stumpers, unique single-step intuition problems that pose challenges for human solvers but are easily verifiable. We compare the performance of four state-of-the-art LLMs (Davinci-2, Davinci-3, GPT-3.5-Turbo, GPT-4) to human participants. Our findings reveal that the new-generation LLMs excel in solving stumpers and surpass human performance. However, humans exhibit superior skills in verifying solutions to the same problems. This research enhances our understanding of LLMs’ cognitive abilities and provides insights for enhancing their problem-solving potential across various domains.</abstract>
<identifier type="citekey">goldstein-etal-2023-decoding</identifier>
<identifier type="doi">10.18653/v1/2023.findings-emnlp.779</identifier>
<location>
<url>https://aclanthology.org/2023.findings-emnlp.779</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>11644</start>
<end>11653</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Decoding Stumpers: Large Language Models vs. Human Problem-Solvers
%A Goldstein, Alon
%A Havin, Miriam
%A Reichart, Roi
%A Goldstein, Ariel
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Findings of the Association for Computational Linguistics: EMNLP 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F goldstein-etal-2023-decoding
%X This paper investigates the problem-solving capabilities of Large Language Models (LLMs) by evaluating their performance on stumpers, unique single-step intuition problems that pose challenges for human solvers but are easily verifiable. We compare the performance of four state-of-the-art LLMs (Davinci-2, Davinci-3, GPT-3.5-Turbo, GPT-4) to human participants. Our findings reveal that the new-generation LLMs excel in solving stumpers and surpass human performance. However, humans exhibit superior skills in verifying solutions to the same problems. This research enhances our understanding of LLMs’ cognitive abilities and provides insights for enhancing their problem-solving potential across various domains.
%R 10.18653/v1/2023.findings-emnlp.779
%U https://aclanthology.org/2023.findings-emnlp.779
%U https://doi.org/10.18653/v1/2023.findings-emnlp.779
%P 11644-11653
Markdown (Informal)
[Decoding Stumpers: Large Language Models vs. Human Problem-Solvers](https://aclanthology.org/2023.findings-emnlp.779) (Goldstein et al., Findings 2023)
ACL