@inproceedings{gaur-saunshi-2023-reasoning,
title = "Reasoning in Large Language Models Through Symbolic Math Word Problems",
author = "Gaur, Vedant and
Saunshi, Nikunj",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-acl.364",
doi = "10.18653/v1/2023.findings-acl.364",
pages = "5889--5903",
abstract = "Large language models (LLMs) have revolutionized NLP by solving downstream tasks with little to no labeled data. Despite their versatile abilities, the larger question of their ability to reason remains ill-understood. This paper addresses reasoning in math word problems (MWPs) by studying symbolic versions of the numeric problems, since a symbolic expression is a {``}concise explanation{''} of the numeric answer. We create and use a symbolic version of the SVAMP dataset and find that GPT-3{'}s davinci-002 model also has good zero-shot accuracy on symbolic MWPs. To evaluate the faithfulness of the model{'}s reasoning, we go beyond accuracy and additionally evaluate the alignment between the final answer and the outputted reasoning, which correspond to numeric and symbolic answers respectively for MWPs. We explore a self-prompting approach to encourage the symbolic reasoning to align with the numeric answer, thus equipping the LLM with the ability to provide a concise and verifiable reasoning and making it more interpretable. Surprisingly, self-prompting also improves the symbolic accuracy to be higher than both the numeric and symbolic accuracies, thus providing an ensembling effect. The SVAMP-Sym dataset will be released for future research on symbolic math problems.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gaur-saunshi-2023-reasoning">
<titleInfo>
<title>Reasoning in Large Language Models Through Symbolic Math Word Problems</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vedant</namePart>
<namePart type="family">Gaur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikunj</namePart>
<namePart type="family">Saunshi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Boyd-Graber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naoaki</namePart>
<namePart type="family">Okazaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large language models (LLMs) have revolutionized NLP by solving downstream tasks with little to no labeled data. Despite their versatile abilities, the larger question of their ability to reason remains ill-understood. This paper addresses reasoning in math word problems (MWPs) by studying symbolic versions of the numeric problems, since a symbolic expression is a “concise explanation” of the numeric answer. We create and use a symbolic version of the SVAMP dataset and find that GPT-3’s davinci-002 model also has good zero-shot accuracy on symbolic MWPs. To evaluate the faithfulness of the model’s reasoning, we go beyond accuracy and additionally evaluate the alignment between the final answer and the outputted reasoning, which correspond to numeric and symbolic answers respectively for MWPs. We explore a self-prompting approach to encourage the symbolic reasoning to align with the numeric answer, thus equipping the LLM with the ability to provide a concise and verifiable reasoning and making it more interpretable. Surprisingly, self-prompting also improves the symbolic accuracy to be higher than both the numeric and symbolic accuracies, thus providing an ensembling effect. The SVAMP-Sym dataset will be released for future research on symbolic math problems.</abstract>
<identifier type="citekey">gaur-saunshi-2023-reasoning</identifier>
<identifier type="doi">10.18653/v1/2023.findings-acl.364</identifier>
<location>
<url>https://aclanthology.org/2023.findings-acl.364</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>5889</start>
<end>5903</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Reasoning in Large Language Models Through Symbolic Math Word Problems
%A Gaur, Vedant
%A Saunshi, Nikunj
%Y Rogers, Anna
%Y Boyd-Graber, Jordan
%Y Okazaki, Naoaki
%S Findings of the Association for Computational Linguistics: ACL 2023
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F gaur-saunshi-2023-reasoning
%X Large language models (LLMs) have revolutionized NLP by solving downstream tasks with little to no labeled data. Despite their versatile abilities, the larger question of their ability to reason remains ill-understood. This paper addresses reasoning in math word problems (MWPs) by studying symbolic versions of the numeric problems, since a symbolic expression is a “concise explanation” of the numeric answer. We create and use a symbolic version of the SVAMP dataset and find that GPT-3’s davinci-002 model also has good zero-shot accuracy on symbolic MWPs. To evaluate the faithfulness of the model’s reasoning, we go beyond accuracy and additionally evaluate the alignment between the final answer and the outputted reasoning, which correspond to numeric and symbolic answers respectively for MWPs. We explore a self-prompting approach to encourage the symbolic reasoning to align with the numeric answer, thus equipping the LLM with the ability to provide a concise and verifiable reasoning and making it more interpretable. Surprisingly, self-prompting also improves the symbolic accuracy to be higher than both the numeric and symbolic accuracies, thus providing an ensembling effect. The SVAMP-Sym dataset will be released for future research on symbolic math problems.
%R 10.18653/v1/2023.findings-acl.364
%U https://aclanthology.org/2023.findings-acl.364
%U https://doi.org/10.18653/v1/2023.findings-acl.364
%P 5889-5903
Markdown (Informal)
[Reasoning in Large Language Models Through Symbolic Math Word Problems](https://aclanthology.org/2023.findings-acl.364) (Gaur & Saunshi, Findings 2023)
ACL