@inproceedings{zhang-etal-2024-working,
title = "Working Memory Identifies Reasoning Limits in Language Models",
author = "Zhang, Chunhui and
Jian, Yiren and
Ouyang, Zhongyu and
Vosoughi, Soroush",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-main.938",
pages = "16896--16922",
abstract = "This study explores the inherent limitations of large language models (LLMs) from a scaling perspective, focusing on the upper bounds of their cognitive capabilities. We integrate insights from cognitive science to quantitatively examine how LLMs perform on n-back tasks{---}a benchmark used to assess working memory, which involves temporarily holding and manipulating information. Our findings reveal that despite the increased model size, LLMs still face significant challenges in holding and processing information effectively, especially under complex task conditions. We also assess various prompting strategies, revealing their diverse impacts on LLM performance. The results highlight the struggle of current LLMs to autonomously discover optimal problem-solving patterns without heavily relying on manually corrected prompts. To move beyond these constraints, fundamental improvements in the planning and search of LLMs are essential for them to reason autonomously. Improving these capabilities will reduce the reliance on external corrections and enable LLMs to become more autonomous in their problem-solving processes.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-etal-2024-working">
<titleInfo>
<title>Working Memory Identifies Reasoning Limits in Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chunhui</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yiren</namePart>
<namePart type="family">Jian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhongyu</namePart>
<namePart type="family">Ouyang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Soroush</namePart>
<namePart type="family">Vosoughi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This study explores the inherent limitations of large language models (LLMs) from a scaling perspective, focusing on the upper bounds of their cognitive capabilities. We integrate insights from cognitive science to quantitatively examine how LLMs perform on n-back tasks—a benchmark used to assess working memory, which involves temporarily holding and manipulating information. Our findings reveal that despite the increased model size, LLMs still face significant challenges in holding and processing information effectively, especially under complex task conditions. We also assess various prompting strategies, revealing their diverse impacts on LLM performance. The results highlight the struggle of current LLMs to autonomously discover optimal problem-solving patterns without heavily relying on manually corrected prompts. To move beyond these constraints, fundamental improvements in the planning and search of LLMs are essential for them to reason autonomously. Improving these capabilities will reduce the reliance on external corrections and enable LLMs to become more autonomous in their problem-solving processes.</abstract>
<identifier type="citekey">zhang-etal-2024-working</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-main.938</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>16896</start>
<end>16922</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Working Memory Identifies Reasoning Limits in Language Models
%A Zhang, Chunhui
%A Jian, Yiren
%A Ouyang, Zhongyu
%A Vosoughi, Soroush
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F zhang-etal-2024-working
%X This study explores the inherent limitations of large language models (LLMs) from a scaling perspective, focusing on the upper bounds of their cognitive capabilities. We integrate insights from cognitive science to quantitatively examine how LLMs perform on n-back tasks—a benchmark used to assess working memory, which involves temporarily holding and manipulating information. Our findings reveal that despite the increased model size, LLMs still face significant challenges in holding and processing information effectively, especially under complex task conditions. We also assess various prompting strategies, revealing their diverse impacts on LLM performance. The results highlight the struggle of current LLMs to autonomously discover optimal problem-solving patterns without heavily relying on manually corrected prompts. To move beyond these constraints, fundamental improvements in the planning and search of LLMs are essential for them to reason autonomously. Improving these capabilities will reduce the reliance on external corrections and enable LLMs to become more autonomous in their problem-solving processes.
%U https://aclanthology.org/2024.emnlp-main.938
%P 16896-16922
Markdown (Informal)
[Working Memory Identifies Reasoning Limits in Language Models](https://aclanthology.org/2024.emnlp-main.938) (Zhang et al., EMNLP 2024)
ACL
- Chunhui Zhang, Yiren Jian, Zhongyu Ouyang, and Soroush Vosoughi. 2024. Working Memory Identifies Reasoning Limits in Language Models. In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, pages 16896–16922, Miami, Florida, USA. Association for Computational Linguistics.