@article{pei-etal-2025-fover,
title = "{F}o{V}er: First-Order Logic Verification for Natural Language Reasoning",
author = "Pei, Yu and
Du, Yongping and
Jin, Xingnan",
journal = "Transactions of the Association for Computational Linguistics",
volume = "13",
year = "2025",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2025.tacl-1.61/",
doi = "10.1162/tacl.a.41",
pages = "1340--1359",
abstract = "Large Language Models (LLMs) have shown remarkable capabilities in various tasks, including logical reasoning. However, their propensity for generating incorrect or inconsistent responses remains a significant concern. To address this issue, we propose FoVer (First-order logic Verification), an automated pipeline that verifies the logical correctness of reasoning texts using first-order logic. The pipeline operates in two main steps: (1) LLM-driven translation of natural language into executable logical expressions, and (2) automated logical verification using the Z3 theorem prover. We evaluate FoVer on specialized logical datasets (ProofWriter and FOLIO) and real-world LLM outputs (REVEAL). The results demonstrate that FoVer outperforms existing methods in logical verification significantly, showing notable improvements in accuracy across both ideal and practical scenarios. The pipeline also demonstrates its potential in identifying annotation errors in existing datasets, and it could be utilized for constructing new logical reasoning datasets. This work presents a significant step forward in enhancing the trustworthiness of LLM outputs, particularly in tasks requiring logical integrity.1"
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pei-etal-2025-fover">
<titleInfo>
<title>FoVer: First-Order Logic Verification for Natural Language Reasoning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Pei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yongping</namePart>
<namePart type="family">Du</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xingnan</namePart>
<namePart type="family">Jin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Large Language Models (LLMs) have shown remarkable capabilities in various tasks, including logical reasoning. However, their propensity for generating incorrect or inconsistent responses remains a significant concern. To address this issue, we propose FoVer (First-order logic Verification), an automated pipeline that verifies the logical correctness of reasoning texts using first-order logic. The pipeline operates in two main steps: (1) LLM-driven translation of natural language into executable logical expressions, and (2) automated logical verification using the Z3 theorem prover. We evaluate FoVer on specialized logical datasets (ProofWriter and FOLIO) and real-world LLM outputs (REVEAL). The results demonstrate that FoVer outperforms existing methods in logical verification significantly, showing notable improvements in accuracy across both ideal and practical scenarios. The pipeline also demonstrates its potential in identifying annotation errors in existing datasets, and it could be utilized for constructing new logical reasoning datasets. This work presents a significant step forward in enhancing the trustworthiness of LLM outputs, particularly in tasks requiring logical integrity.1</abstract>
<identifier type="citekey">pei-etal-2025-fover</identifier>
<identifier type="doi">10.1162/tacl.a.41</identifier>
<location>
<url>https://aclanthology.org/2025.tacl-1.61/</url>
</location>
<part>
<date>2025</date>
<detail type="volume"><number>13</number></detail>
<extent unit="page">
<start>1340</start>
<end>1359</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T FoVer: First-Order Logic Verification for Natural Language Reasoning
%A Pei, Yu
%A Du, Yongping
%A Jin, Xingnan
%J Transactions of the Association for Computational Linguistics
%D 2025
%V 13
%I MIT Press
%C Cambridge, MA
%F pei-etal-2025-fover
%X Large Language Models (LLMs) have shown remarkable capabilities in various tasks, including logical reasoning. However, their propensity for generating incorrect or inconsistent responses remains a significant concern. To address this issue, we propose FoVer (First-order logic Verification), an automated pipeline that verifies the logical correctness of reasoning texts using first-order logic. The pipeline operates in two main steps: (1) LLM-driven translation of natural language into executable logical expressions, and (2) automated logical verification using the Z3 theorem prover. We evaluate FoVer on specialized logical datasets (ProofWriter and FOLIO) and real-world LLM outputs (REVEAL). The results demonstrate that FoVer outperforms existing methods in logical verification significantly, showing notable improvements in accuracy across both ideal and practical scenarios. The pipeline also demonstrates its potential in identifying annotation errors in existing datasets, and it could be utilized for constructing new logical reasoning datasets. This work presents a significant step forward in enhancing the trustworthiness of LLM outputs, particularly in tasks requiring logical integrity.1
%R 10.1162/tacl.a.41
%U https://aclanthology.org/2025.tacl-1.61/
%U https://doi.org/10.1162/tacl.a.41
%P 1340-1359
Markdown (Informal)
[FoVer: First-Order Logic Verification for Natural Language Reasoning](https://aclanthology.org/2025.tacl-1.61/) (Pei et al., TACL 2025)
ACL