@inproceedings{yang-etal-2024-harnessing,
title = "Harnessing the Power of Large Language Models for Natural Language to First-Order Logic Translation",
author = "Yang, Yuan and
Xiong, Siheng and
Payani, Ali and
Shareghi, Ehsan and
Fekri, Faramarz",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.luhme-long.375/",
doi = "10.18653/v1/2024.acl-long.375",
pages = "6942--6959",
abstract = "Advancements in logical reasoning, utilizing LLMs to convert natural language into logical symbolism, combined with the use of external theorem provers, have repositioned the symbolic approach as a central point of interest. The main challenge within this paradigm lies in the LLMs' capability to accurately translate natural language (NL) statements into first-order-logic (FOL) expressions. Although LLMs have shown notable success, there remains a gap in understanding the limitations and challenges they encounter in NL-FOL translation. This is primarily due to the absence of datasets and evaluation test beds at the required fine-grained level. We present MALLS, a dataset of 28K diverse and verified sentence-level NL-FOL pairs collected from GPT4. We utilize a combined strategy of FOL rule parsing, human annotation, and automatic filtering to ensure quality. We also present LogicLLaMA, a LLaMA2-7B/13B fine-tuned on MALLS for NL-FOL translation, which can be used standalone or to correct previously generated rules by GPT3.5 after being further fine-tuned via a novel reinforcement learning with human feedback (RLHF) framework. We benchmark a wide range of LLMs on MALLS and previous datasets, highlighting weaknesses in them in NL-FOL translation and demonstrating the advantages of MALLS. We also show that LogicLLaMA achieves GPT4-level performance and can generalize to other datasets. Project repo is available at https://github.com/gblackout/LogicLLaMA"
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yang-etal-2024-harnessing">
<titleInfo>
<title>Harnessing the Power of Large Language Models for Natural Language to First-Order Logic Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yuan</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Siheng</namePart>
<namePart type="family">Xiong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ali</namePart>
<namePart type="family">Payani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ehsan</namePart>
<namePart type="family">Shareghi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Faramarz</namePart>
<namePart type="family">Fekri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Srikumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Advancements in logical reasoning, utilizing LLMs to convert natural language into logical symbolism, combined with the use of external theorem provers, have repositioned the symbolic approach as a central point of interest. The main challenge within this paradigm lies in the LLMs’ capability to accurately translate natural language (NL) statements into first-order-logic (FOL) expressions. Although LLMs have shown notable success, there remains a gap in understanding the limitations and challenges they encounter in NL-FOL translation. This is primarily due to the absence of datasets and evaluation test beds at the required fine-grained level. We present MALLS, a dataset of 28K diverse and verified sentence-level NL-FOL pairs collected from GPT4. We utilize a combined strategy of FOL rule parsing, human annotation, and automatic filtering to ensure quality. We also present LogicLLaMA, a LLaMA2-7B/13B fine-tuned on MALLS for NL-FOL translation, which can be used standalone or to correct previously generated rules by GPT3.5 after being further fine-tuned via a novel reinforcement learning with human feedback (RLHF) framework. We benchmark a wide range of LLMs on MALLS and previous datasets, highlighting weaknesses in them in NL-FOL translation and demonstrating the advantages of MALLS. We also show that LogicLLaMA achieves GPT4-level performance and can generalize to other datasets. Project repo is available at https://github.com/gblackout/LogicLLaMA</abstract>
<identifier type="citekey">yang-etal-2024-harnessing</identifier>
<identifier type="doi">10.18653/v1/2024.acl-long.375</identifier>
<location>
<url>https://aclanthology.org/2024.luhme-long.375/</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>6942</start>
<end>6959</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Harnessing the Power of Large Language Models for Natural Language to First-Order Logic Translation
%A Yang, Yuan
%A Xiong, Siheng
%A Payani, Ali
%A Shareghi, Ehsan
%A Fekri, Faramarz
%Y Ku, Lun-Wei
%Y Martins, Andre
%Y Srikumar, Vivek
%S Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F yang-etal-2024-harnessing
%X Advancements in logical reasoning, utilizing LLMs to convert natural language into logical symbolism, combined with the use of external theorem provers, have repositioned the symbolic approach as a central point of interest. The main challenge within this paradigm lies in the LLMs’ capability to accurately translate natural language (NL) statements into first-order-logic (FOL) expressions. Although LLMs have shown notable success, there remains a gap in understanding the limitations and challenges they encounter in NL-FOL translation. This is primarily due to the absence of datasets and evaluation test beds at the required fine-grained level. We present MALLS, a dataset of 28K diverse and verified sentence-level NL-FOL pairs collected from GPT4. We utilize a combined strategy of FOL rule parsing, human annotation, and automatic filtering to ensure quality. We also present LogicLLaMA, a LLaMA2-7B/13B fine-tuned on MALLS for NL-FOL translation, which can be used standalone or to correct previously generated rules by GPT3.5 after being further fine-tuned via a novel reinforcement learning with human feedback (RLHF) framework. We benchmark a wide range of LLMs on MALLS and previous datasets, highlighting weaknesses in them in NL-FOL translation and demonstrating the advantages of MALLS. We also show that LogicLLaMA achieves GPT4-level performance and can generalize to other datasets. Project repo is available at https://github.com/gblackout/LogicLLaMA
%R 10.18653/v1/2024.acl-long.375
%U https://aclanthology.org/2024.luhme-long.375/
%U https://doi.org/10.18653/v1/2024.acl-long.375
%P 6942-6959
Markdown (Informal)
[Harnessing the Power of Large Language Models for Natural Language to First-Order Logic Translation](https://aclanthology.org/2024.luhme-long.375/) (Yang et al., ACL 2024)
ACL