@inproceedings{dougrez-lewis-etal-2025-assessing,
title = "Assessing the Reasoning Capabilities of {LLM}s in the context of Evidence-based Claim Verification",
author = {Dougrez-Lewis, John and
Akhter, Mahmud Elahi and
Ruggeri, Federico and
L{\"o}bbers, Sebastian and
He, Yulan and
Liakata, Maria},
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.1059/",
doi = "10.18653/v1/2025.findings-acl.1059",
pages = "20604--20628",
ISBN = "979-8-89176-256-5",
abstract = "Although LLMs have shown great performance on Mathematics and Coding related reasoning tasks, the reasoning capabilities of LLMs regarding other forms of reasoning are still an open problem. Here, we examine the issue of reasoning from the perspective of claim verification. We propose a framework designed to break down any claim paired with evidence into atomic reasoning types that are necessary for verification. We use this framework to create RECV, the first claim verification benchmark, incorporating real-world claims, to assess the deductive and abductive reasoning capabilities of LLMs. The benchmark comprises of three datasets, covering reasoning problems of in creasing complexity. We evaluate three state of-the-art proprietary LLMs under multiple prompt settings. Our results show that while LLMs can address deductive reasoning prob lems, they consistently fail in cases of abductive reasoning. Moreover, we observe that enhancing LLMs with rationale generation is not always beneficial. Nonetheless, we find that generated rationales are semantically similar to those provided by humans, especially in deduc tive reasoning cases."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dougrez-lewis-etal-2025-assessing">
<titleInfo>
<title>Assessing the Reasoning Capabilities of LLMs in the context of Evidence-based Claim Verification</title>
</titleInfo>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="family">Dougrez-Lewis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mahmud</namePart>
<namePart type="given">Elahi</namePart>
<namePart type="family">Akhter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Federico</namePart>
<namePart type="family">Ruggeri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Löbbers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-256-5</identifier>
</relatedItem>
<abstract>Although LLMs have shown great performance on Mathematics and Coding related reasoning tasks, the reasoning capabilities of LLMs regarding other forms of reasoning are still an open problem. Here, we examine the issue of reasoning from the perspective of claim verification. We propose a framework designed to break down any claim paired with evidence into atomic reasoning types that are necessary for verification. We use this framework to create RECV, the first claim verification benchmark, incorporating real-world claims, to assess the deductive and abductive reasoning capabilities of LLMs. The benchmark comprises of three datasets, covering reasoning problems of in creasing complexity. We evaluate three state of-the-art proprietary LLMs under multiple prompt settings. Our results show that while LLMs can address deductive reasoning prob lems, they consistently fail in cases of abductive reasoning. Moreover, we observe that enhancing LLMs with rationale generation is not always beneficial. Nonetheless, we find that generated rationales are semantically similar to those provided by humans, especially in deduc tive reasoning cases.</abstract>
<identifier type="citekey">dougrez-lewis-etal-2025-assessing</identifier>
<identifier type="doi">10.18653/v1/2025.findings-acl.1059</identifier>
<location>
<url>https://aclanthology.org/2025.findings-acl.1059/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>20604</start>
<end>20628</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Assessing the Reasoning Capabilities of LLMs in the context of Evidence-based Claim Verification
%A Dougrez-Lewis, John
%A Akhter, Mahmud Elahi
%A Ruggeri, Federico
%A Löbbers, Sebastian
%A He, Yulan
%A Liakata, Maria
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Findings of the Association for Computational Linguistics: ACL 2025
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-256-5
%F dougrez-lewis-etal-2025-assessing
%X Although LLMs have shown great performance on Mathematics and Coding related reasoning tasks, the reasoning capabilities of LLMs regarding other forms of reasoning are still an open problem. Here, we examine the issue of reasoning from the perspective of claim verification. We propose a framework designed to break down any claim paired with evidence into atomic reasoning types that are necessary for verification. We use this framework to create RECV, the first claim verification benchmark, incorporating real-world claims, to assess the deductive and abductive reasoning capabilities of LLMs. The benchmark comprises of three datasets, covering reasoning problems of in creasing complexity. We evaluate three state of-the-art proprietary LLMs under multiple prompt settings. Our results show that while LLMs can address deductive reasoning prob lems, they consistently fail in cases of abductive reasoning. Moreover, we observe that enhancing LLMs with rationale generation is not always beneficial. Nonetheless, we find that generated rationales are semantically similar to those provided by humans, especially in deduc tive reasoning cases.
%R 10.18653/v1/2025.findings-acl.1059
%U https://aclanthology.org/2025.findings-acl.1059/
%U https://doi.org/10.18653/v1/2025.findings-acl.1059
%P 20604-20628
Markdown (Informal)
[Assessing the Reasoning Capabilities of LLMs in the context of Evidence-based Claim Verification](https://aclanthology.org/2025.findings-acl.1059/) (Dougrez-Lewis et al., Findings 2025)
ACL