@inproceedings{yun-etal-2025-abstract,
title = "What is an ``Abstract Reasoner''? Revisiting Experiments and Arguments about Large Language Models",
author = "Yun, Tian and
Sun, Chen and
Pavlick, Ellie",
editor = "Boleda, Gemma and
Roth, Michael",
booktitle = "Proceedings of the 29th Conference on Computational Natural Language Learning",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.conll-1.11/",
doi = "10.18653/v1/2025.conll-1.11",
pages = "156--168",
ISBN = "979-8-89176-271-8",
abstract = "Recent work has argued that large language models (LLMs) are not ``abstract reasoners'', citing their poor zero-shot performance on a variety of challenging tasks as evidence. We revisit these experiments in order to add nuance to the claim. First, we show that while LLMs indeed perform poorly in a zero-shot setting, even tuning a small subset of parameters for input encoding can enable near-perfect performance. However, we also show that this finetuning does not necessarily transfer across datasets. We take this collection of empirical results as an invitation to (re-)open the discussion of what it means to be an ``abstract reasoner'', and why it matters whether LLMs fit the bill."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yun-etal-2025-abstract">
<titleInfo>
<title>What is an “Abstract Reasoner”? Revisiting Experiments and Arguments about Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tian</namePart>
<namePart type="family">Yun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chen</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ellie</namePart>
<namePart type="family">Pavlick</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 29th Conference on Computational Natural Language Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gemma</namePart>
<namePart type="family">Boleda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Roth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-271-8</identifier>
</relatedItem>
<abstract>Recent work has argued that large language models (LLMs) are not “abstract reasoners”, citing their poor zero-shot performance on a variety of challenging tasks as evidence. We revisit these experiments in order to add nuance to the claim. First, we show that while LLMs indeed perform poorly in a zero-shot setting, even tuning a small subset of parameters for input encoding can enable near-perfect performance. However, we also show that this finetuning does not necessarily transfer across datasets. We take this collection of empirical results as an invitation to (re-)open the discussion of what it means to be an “abstract reasoner”, and why it matters whether LLMs fit the bill.</abstract>
<identifier type="citekey">yun-etal-2025-abstract</identifier>
<identifier type="doi">10.18653/v1/2025.conll-1.11</identifier>
<location>
<url>https://aclanthology.org/2025.conll-1.11/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>156</start>
<end>168</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T What is an “Abstract Reasoner”? Revisiting Experiments and Arguments about Large Language Models
%A Yun, Tian
%A Sun, Chen
%A Pavlick, Ellie
%Y Boleda, Gemma
%Y Roth, Michael
%S Proceedings of the 29th Conference on Computational Natural Language Learning
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-271-8
%F yun-etal-2025-abstract
%X Recent work has argued that large language models (LLMs) are not “abstract reasoners”, citing their poor zero-shot performance on a variety of challenging tasks as evidence. We revisit these experiments in order to add nuance to the claim. First, we show that while LLMs indeed perform poorly in a zero-shot setting, even tuning a small subset of parameters for input encoding can enable near-perfect performance. However, we also show that this finetuning does not necessarily transfer across datasets. We take this collection of empirical results as an invitation to (re-)open the discussion of what it means to be an “abstract reasoner”, and why it matters whether LLMs fit the bill.
%R 10.18653/v1/2025.conll-1.11
%U https://aclanthology.org/2025.conll-1.11/
%U https://doi.org/10.18653/v1/2025.conll-1.11
%P 156-168
Markdown (Informal)
[What is an “Abstract Reasoner”? Revisiting Experiments and Arguments about Large Language Models](https://aclanthology.org/2025.conll-1.11/) (Yun et al., CoNLL 2025)
ACL