@inproceedings{li-etal-2026-formulareasoning,
title = "{F}ormula{R}easoning: A Dataset for Formula-Based Numerical Reasoning",
author = "Li, Xiao and
Zhu, Bolin and
Shi, Kaiwen and
Liu, Sichen and
Zhu, Yin and
Liu, Yiwei and
Cheng, Gong",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.1580/",
pages = "31579--31601",
ISBN = "979-8-89176-395-1",
abstract = "The application of physics formulas is a fundamental human capability in numerical reasoning. While existing datasets often rely on implicit mathematical knowledge, they rarely explicitate the underlying formulas. To address this, we introduce FormulaReasoning, a new benchmark for formula-based numerical reasoning comprising 5,324 questions requiring calculations grounded in external physics principles. We provide high-quality, fine-grained annotations in English and Chinese{---}including formula structures, parameter names, symbols, values, and units{---}curated through manual effort and LLM-assisted validation. Additionally, we provide a consolidated formula database as an external knowledge source. To further challenge model performance, we develop an extended version of the dataset by coupling multiple questions. We evaluate various architectural and methodological frameworks, including retrieval-augmented methods, modular reasoning (formula generation, parameter extraction, and calculation), and preference-based optimization. Our analysis identifies critical challenges in formula-based reasoning, highlighting significant opportunities for future methodological advancement."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="li-etal-2026-formulareasoning">
<titleInfo>
<title>FormulaReasoning: A Dataset for Formula-Based Numerical Reasoning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xiao</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bolin</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kaiwen</namePart>
<namePart type="family">Shi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sichen</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yin</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yiwei</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gong</namePart>
<namePart type="family">Cheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>The application of physics formulas is a fundamental human capability in numerical reasoning. While existing datasets often rely on implicit mathematical knowledge, they rarely explicitate the underlying formulas. To address this, we introduce FormulaReasoning, a new benchmark for formula-based numerical reasoning comprising 5,324 questions requiring calculations grounded in external physics principles. We provide high-quality, fine-grained annotations in English and Chinese—including formula structures, parameter names, symbols, values, and units—curated through manual effort and LLM-assisted validation. Additionally, we provide a consolidated formula database as an external knowledge source. To further challenge model performance, we develop an extended version of the dataset by coupling multiple questions. We evaluate various architectural and methodological frameworks, including retrieval-augmented methods, modular reasoning (formula generation, parameter extraction, and calculation), and preference-based optimization. Our analysis identifies critical challenges in formula-based reasoning, highlighting significant opportunities for future methodological advancement.</abstract>
<identifier type="citekey">li-etal-2026-formulareasoning</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.1580/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>31579</start>
<end>31601</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T FormulaReasoning: A Dataset for Formula-Based Numerical Reasoning
%A Li, Xiao
%A Zhu, Bolin
%A Shi, Kaiwen
%A Liu, Sichen
%A Zhu, Yin
%A Liu, Yiwei
%A Cheng, Gong
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F li-etal-2026-formulareasoning
%X The application of physics formulas is a fundamental human capability in numerical reasoning. While existing datasets often rely on implicit mathematical knowledge, they rarely explicitate the underlying formulas. To address this, we introduce FormulaReasoning, a new benchmark for formula-based numerical reasoning comprising 5,324 questions requiring calculations grounded in external physics principles. We provide high-quality, fine-grained annotations in English and Chinese—including formula structures, parameter names, symbols, values, and units—curated through manual effort and LLM-assisted validation. Additionally, we provide a consolidated formula database as an external knowledge source. To further challenge model performance, we develop an extended version of the dataset by coupling multiple questions. We evaluate various architectural and methodological frameworks, including retrieval-augmented methods, modular reasoning (formula generation, parameter extraction, and calculation), and preference-based optimization. Our analysis identifies critical challenges in formula-based reasoning, highlighting significant opportunities for future methodological advancement.
%U https://aclanthology.org/2026.findings-acl.1580/
%P 31579-31601
Markdown (Informal)
[FormulaReasoning: A Dataset for Formula-Based Numerical Reasoning](https://aclanthology.org/2026.findings-acl.1580/) (Li et al., Findings 2026)
ACL
- Xiao Li, Bolin Zhu, Kaiwen Shi, Sichen Liu, Yin Zhu, Yiwei Liu, and Gong Cheng. 2026. FormulaReasoning: A Dataset for Formula-Based Numerical Reasoning. In Findings of the Association for Computational Linguistics: ACL 2026, pages 31579–31601, San Diego, California, United States. Association for Computational Linguistics.