@inproceedings{ariyani-etal-2025-theres,
title = "There{'}s No Such Thing as Simple Reasoning for {LLM}s",
author = "Ariyani, Nurul Fajrin and
Bouraoui, Zied and
Booth, Richard and
Schockaert, Steven",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.232/",
doi = "10.18653/v1/2025.findings-acl.232",
pages = "4503--4514",
ISBN = "979-8-89176-256-5",
abstract = "Large Language Models (LLMs) have been widely found to struggle with logical reasoning, where even fine-tuned models fail dramatically on out-of-distribution problems. However, existing work has focused on relatively complex ``many-hop'' reasoning problems. In this paper, we analyse the performance of fine-tuned LLMs on simple reasoning problems, all of which can be solved in at most three inference steps. Due to the simplicity of these problems, the model cannot encounter test problems that are fundamentally different from those it has seen during training. Unfortunately, however, we find that the models remain highly brittle, being susceptible to seemingly innocent perturbations, such as the addition of duplicates to the set of premises and shuffling the order in which the premises are presented."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ariyani-etal-2025-theres">
<titleInfo>
<title>There’s No Such Thing as Simple Reasoning for LLMs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nurul</namePart>
<namePart type="given">Fajrin</namePart>
<namePart type="family">Ariyani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zied</namePart>
<namePart type="family">Bouraoui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="family">Booth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Schockaert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-256-5</identifier>
</relatedItem>
<abstract>Large Language Models (LLMs) have been widely found to struggle with logical reasoning, where even fine-tuned models fail dramatically on out-of-distribution problems. However, existing work has focused on relatively complex “many-hop” reasoning problems. In this paper, we analyse the performance of fine-tuned LLMs on simple reasoning problems, all of which can be solved in at most three inference steps. Due to the simplicity of these problems, the model cannot encounter test problems that are fundamentally different from those it has seen during training. Unfortunately, however, we find that the models remain highly brittle, being susceptible to seemingly innocent perturbations, such as the addition of duplicates to the set of premises and shuffling the order in which the premises are presented.</abstract>
<identifier type="citekey">ariyani-etal-2025-theres</identifier>
<identifier type="doi">10.18653/v1/2025.findings-acl.232</identifier>
<location>
<url>https://aclanthology.org/2025.findings-acl.232/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>4503</start>
<end>4514</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T There’s No Such Thing as Simple Reasoning for LLMs
%A Ariyani, Nurul Fajrin
%A Bouraoui, Zied
%A Booth, Richard
%A Schockaert, Steven
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Findings of the Association for Computational Linguistics: ACL 2025
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-256-5
%F ariyani-etal-2025-theres
%X Large Language Models (LLMs) have been widely found to struggle with logical reasoning, where even fine-tuned models fail dramatically on out-of-distribution problems. However, existing work has focused on relatively complex “many-hop” reasoning problems. In this paper, we analyse the performance of fine-tuned LLMs on simple reasoning problems, all of which can be solved in at most three inference steps. Due to the simplicity of these problems, the model cannot encounter test problems that are fundamentally different from those it has seen during training. Unfortunately, however, we find that the models remain highly brittle, being susceptible to seemingly innocent perturbations, such as the addition of duplicates to the set of premises and shuffling the order in which the premises are presented.
%R 10.18653/v1/2025.findings-acl.232
%U https://aclanthology.org/2025.findings-acl.232/
%U https://doi.org/10.18653/v1/2025.findings-acl.232
%P 4503-4514
Markdown (Informal)
[There’s No Such Thing as Simple Reasoning for LLMs](https://aclanthology.org/2025.findings-acl.232/) (Ariyani et al., Findings 2025)
ACL
- Nurul Fajrin Ariyani, Zied Bouraoui, Richard Booth, and Steven Schockaert. 2025. There’s No Such Thing as Simple Reasoning for LLMs. In Findings of the Association for Computational Linguistics: ACL 2025, pages 4503–4514, Vienna, Austria. Association for Computational Linguistics.