@inproceedings{pan-etal-2025-lemma,
title = "{LEMMA}: Learning from Errors for {M}athe{M}atical Advancement in {LLM}s",
author = "Pan, Zhuoshi and
Li, Yu and
Lin, Honglin and
Pei, Qizhi and
Tang, Zinan and
Wu, Wei and
Ming, Chenlin and
Zhao, H. Vicky and
He, Conghui and
Wu, Lijun",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.605/",
doi = "10.18653/v1/2025.findings-acl.605",
pages = "11615--11639",
ISBN = "979-8-89176-256-5",
abstract = "Large language models (LLMs) have demonstrated remarkable reasoning capability in solving mathematical problems. However, existing approaches primarily focus on improving the quality of correct training data, e.g., distilling high-quality correct solutions from advanced models, neglecting the value contained in error data, potentially hindering the model{'}s reflective ability. Though some studies attempted to leverage error data, they often involve complex mechanisms, such as Monte Carlo Tree Search (MCTS) to explore error nodes.In this work, we propose to enhance LLM{'}s reasoning ability by Learning from Errors for MatheMatical Advancement (LEMMA). LEMMA constructs data consists of an incorrect solution with an erroneous step and a reflection connection to a correct solution for fine-tuning. Specifically, we systematically analyze the model-generated error types and introduce an {\_}error-type grounded mistake augmentation{\_} method to collect diverse and representative errors. Correct solutions are either from fixing the errors or generating a fresh start. By fine-tuning on the constructed dataset, the model is able to {\_}self-correct errors autonomously{\_} within the generation process {\_}without relying on external critique models{\_}. Experimental results demonstrate that LEMMA achieves significant performance improvements over other strong models with less than $90k$ data."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pan-etal-2025-lemma">
<titleInfo>
<title>LEMMA: Learning from Errors for MatheMatical Advancement in LLMs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zhuoshi</namePart>
<namePart type="family">Pan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Honglin</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qizhi</namePart>
<namePart type="family">Pei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zinan</namePart>
<namePart type="family">Tang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chenlin</namePart>
<namePart type="family">Ming</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">H</namePart>
<namePart type="given">Vicky</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Conghui</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lijun</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-256-5</identifier>
</relatedItem>
<abstract>Large language models (LLMs) have demonstrated remarkable reasoning capability in solving mathematical problems. However, existing approaches primarily focus on improving the quality of correct training data, e.g., distilling high-quality correct solutions from advanced models, neglecting the value contained in error data, potentially hindering the model’s reflective ability. Though some studies attempted to leverage error data, they often involve complex mechanisms, such as Monte Carlo Tree Search (MCTS) to explore error nodes.In this work, we propose to enhance LLM’s reasoning ability by Learning from Errors for MatheMatical Advancement (LEMMA). LEMMA constructs data consists of an incorrect solution with an erroneous step and a reflection connection to a correct solution for fine-tuning. Specifically, we systematically analyze the model-generated error types and introduce an _error-type grounded mistake augmentation_ method to collect diverse and representative errors. Correct solutions are either from fixing the errors or generating a fresh start. By fine-tuning on the constructed dataset, the model is able to _self-correct errors autonomously_ within the generation process _without relying on external critique models_. Experimental results demonstrate that LEMMA achieves significant performance improvements over other strong models with less than 90k data.</abstract>
<identifier type="citekey">pan-etal-2025-lemma</identifier>
<identifier type="doi">10.18653/v1/2025.findings-acl.605</identifier>
<location>
<url>https://aclanthology.org/2025.findings-acl.605/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>11615</start>
<end>11639</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T LEMMA: Learning from Errors for MatheMatical Advancement in LLMs
%A Pan, Zhuoshi
%A Li, Yu
%A Lin, Honglin
%A Pei, Qizhi
%A Tang, Zinan
%A Wu, Wei
%A Ming, Chenlin
%A Zhao, H. Vicky
%A He, Conghui
%A Wu, Lijun
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Findings of the Association for Computational Linguistics: ACL 2025
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-256-5
%F pan-etal-2025-lemma
%X Large language models (LLMs) have demonstrated remarkable reasoning capability in solving mathematical problems. However, existing approaches primarily focus on improving the quality of correct training data, e.g., distilling high-quality correct solutions from advanced models, neglecting the value contained in error data, potentially hindering the model’s reflective ability. Though some studies attempted to leverage error data, they often involve complex mechanisms, such as Monte Carlo Tree Search (MCTS) to explore error nodes.In this work, we propose to enhance LLM’s reasoning ability by Learning from Errors for MatheMatical Advancement (LEMMA). LEMMA constructs data consists of an incorrect solution with an erroneous step and a reflection connection to a correct solution for fine-tuning. Specifically, we systematically analyze the model-generated error types and introduce an _error-type grounded mistake augmentation_ method to collect diverse and representative errors. Correct solutions are either from fixing the errors or generating a fresh start. By fine-tuning on the constructed dataset, the model is able to _self-correct errors autonomously_ within the generation process _without relying on external critique models_. Experimental results demonstrate that LEMMA achieves significant performance improvements over other strong models with less than 90k data.
%R 10.18653/v1/2025.findings-acl.605
%U https://aclanthology.org/2025.findings-acl.605/
%U https://doi.org/10.18653/v1/2025.findings-acl.605
%P 11615-11639
Markdown (Informal)
[LEMMA: Learning from Errors for MatheMatical Advancement in LLMs](https://aclanthology.org/2025.findings-acl.605/) (Pan et al., Findings 2025)
ACL
- Zhuoshi Pan, Yu Li, Honglin Lin, Qizhi Pei, Zinan Tang, Wei Wu, Chenlin Ming, H. Vicky Zhao, Conghui He, and Lijun Wu. 2025. LEMMA: Learning from Errors for MatheMatical Advancement in LLMs. In Findings of the Association for Computational Linguistics: ACL 2025, pages 11615–11639, Vienna, Austria. Association for Computational Linguistics.