@inproceedings{fernando-lopez-ponce-bel-enguix-2025-limits,
title = "Into The Limits of Logic: Alignment Methods for Formal Logical Reasoning",
author = "Fernando Lopez-Ponce, Francisco and
Bel-Enguix, Gemma",
editor = "Valentino, Marco and
Ferreira, Deborah and
Thayaparan, Mokanarangan and
Ranaldi, Leonardo and
Freitas, Andre",
booktitle = "Proceedings of The 3rd Workshop on Mathematical Natural Language Processing (MathNLP 2025)",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.mathnlp-main.8/",
pages = "112--123",
ISBN = "979-8-89176-348-7",
abstract = "We implement Large Language Model Alignment algorithms to formal logic reasoning tasks involving natural-language (NL) to first-order logic (FOL) translation, formal logic inference, and premise retranslation. These methodologies were implemented using task-specific preference datasets created based on the FOLIO datasets and LLM generations. Alignment was based on DPO, this algorithm was implemented and tested on off-the-shelf and pre-aligned models, showing promising results for higher quality NL-FOL parsing, as well as general alignment strategies. In addition, we introduce a new similarity metric ($LogicSim$) between LLM-generated responses and gold standard values, that measures logic-relevant information such as premise count and overlap between answers and expands evaluation of NL-FOL translation pipelines. Our results show that LLMs still struggle with logical inference, however alignment benefits semantic parsing and retranslation of results from formal logic to natural language."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fernando-lopez-ponce-bel-enguix-2025-limits">
<titleInfo>
<title>Into The Limits of Logic: Alignment Methods for Formal Logical Reasoning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Francisco</namePart>
<namePart type="family">Fernando Lopez-Ponce</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gemma</namePart>
<namePart type="family">Bel-Enguix</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of The 3rd Workshop on Mathematical Natural Language Processing (MathNLP 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Valentino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Deborah</namePart>
<namePart type="family">Ferreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mokanarangan</namePart>
<namePart type="family">Thayaparan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leonardo</namePart>
<namePart type="family">Ranaldi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Freitas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-348-7</identifier>
</relatedItem>
<abstract>We implement Large Language Model Alignment algorithms to formal logic reasoning tasks involving natural-language (NL) to first-order logic (FOL) translation, formal logic inference, and premise retranslation. These methodologies were implemented using task-specific preference datasets created based on the FOLIO datasets and LLM generations. Alignment was based on DPO, this algorithm was implemented and tested on off-the-shelf and pre-aligned models, showing promising results for higher quality NL-FOL parsing, as well as general alignment strategies. In addition, we introduce a new similarity metric (LogicSim) between LLM-generated responses and gold standard values, that measures logic-relevant information such as premise count and overlap between answers and expands evaluation of NL-FOL translation pipelines. Our results show that LLMs still struggle with logical inference, however alignment benefits semantic parsing and retranslation of results from formal logic to natural language.</abstract>
<identifier type="citekey">fernando-lopez-ponce-bel-enguix-2025-limits</identifier>
<location>
<url>https://aclanthology.org/2025.mathnlp-main.8/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>112</start>
<end>123</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Into The Limits of Logic: Alignment Methods for Formal Logical Reasoning
%A Fernando Lopez-Ponce, Francisco
%A Bel-Enguix, Gemma
%Y Valentino, Marco
%Y Ferreira, Deborah
%Y Thayaparan, Mokanarangan
%Y Ranaldi, Leonardo
%Y Freitas, Andre
%S Proceedings of The 3rd Workshop on Mathematical Natural Language Processing (MathNLP 2025)
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-348-7
%F fernando-lopez-ponce-bel-enguix-2025-limits
%X We implement Large Language Model Alignment algorithms to formal logic reasoning tasks involving natural-language (NL) to first-order logic (FOL) translation, formal logic inference, and premise retranslation. These methodologies were implemented using task-specific preference datasets created based on the FOLIO datasets and LLM generations. Alignment was based on DPO, this algorithm was implemented and tested on off-the-shelf and pre-aligned models, showing promising results for higher quality NL-FOL parsing, as well as general alignment strategies. In addition, we introduce a new similarity metric (LogicSim) between LLM-generated responses and gold standard values, that measures logic-relevant information such as premise count and overlap between answers and expands evaluation of NL-FOL translation pipelines. Our results show that LLMs still struggle with logical inference, however alignment benefits semantic parsing and retranslation of results from formal logic to natural language.
%U https://aclanthology.org/2025.mathnlp-main.8/
%P 112-123
Markdown (Informal)
[Into The Limits of Logic: Alignment Methods for Formal Logical Reasoning](https://aclanthology.org/2025.mathnlp-main.8/) (Fernando Lopez-Ponce & Bel-Enguix, MathNLP 2025)
ACL