@inproceedings{an-etal-2024-llms,
title = "Can {LLM}s Learn From Mistakes? An Empirical Study on Reasoning Tasks",
author = "An, Shengnan and
Ma, Zexiong and
Cai, Siqi and
Lin, Zeqi and
Zheng, Nanning and
Lou, Jian-Guang and
Chen, Weizhu",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.46",
pages = "833--854",
abstract = "Towards enhancing the chain-of-thought (CoT) reasoning of large language models (LLMs), much existing work has revealed the effectiveness of straightforward learning on annotated/generated CoT paths. However, there is less evidence yet that reasoning capabilities can be enhanced through a reverse learning process, i.e., learning from potential mistakes in reasoning. To investigate whether LLMs can learn from mistakes, we construct mistake-correction datasets, using GPT-4 to identify and correct the mistakes in inaccurate CoTs. With these mistake-correction datasets, we fine-tune open-source LLMs and arrive at the following conclusions. (1) LLMs can indeed learn from mistakes to enhance their CoT reasoning performances. (2) Compared to CoT data, the mistake-correction data provides additional knowledge on the explanations and reasons for the potential mistakes in CoTs, which consistently contributes to the effectiveness of learning from mistakes. (3) Evolution techniques, especially the correction-centric evolution we introduced, can further enhance the effectiveness of learning from mistakes.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="an-etal-2024-llms">
<titleInfo>
<title>Can LLMs Learn From Mistakes? An Empirical Study on Reasoning Tasks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shengnan</namePart>
<namePart type="family">An</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zexiong</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Siqi</namePart>
<namePart type="family">Cai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zeqi</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nanning</namePart>
<namePart type="family">Zheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jian-Guang</namePart>
<namePart type="family">Lou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Weizhu</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Towards enhancing the chain-of-thought (CoT) reasoning of large language models (LLMs), much existing work has revealed the effectiveness of straightforward learning on annotated/generated CoT paths. However, there is less evidence yet that reasoning capabilities can be enhanced through a reverse learning process, i.e., learning from potential mistakes in reasoning. To investigate whether LLMs can learn from mistakes, we construct mistake-correction datasets, using GPT-4 to identify and correct the mistakes in inaccurate CoTs. With these mistake-correction datasets, we fine-tune open-source LLMs and arrive at the following conclusions. (1) LLMs can indeed learn from mistakes to enhance their CoT reasoning performances. (2) Compared to CoT data, the mistake-correction data provides additional knowledge on the explanations and reasons for the potential mistakes in CoTs, which consistently contributes to the effectiveness of learning from mistakes. (3) Evolution techniques, especially the correction-centric evolution we introduced, can further enhance the effectiveness of learning from mistakes.</abstract>
<identifier type="citekey">an-etal-2024-llms</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.46</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>833</start>
<end>854</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Can LLMs Learn From Mistakes? An Empirical Study on Reasoning Tasks
%A An, Shengnan
%A Ma, Zexiong
%A Cai, Siqi
%A Lin, Zeqi
%A Zheng, Nanning
%A Lou, Jian-Guang
%A Chen, Weizhu
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F an-etal-2024-llms
%X Towards enhancing the chain-of-thought (CoT) reasoning of large language models (LLMs), much existing work has revealed the effectiveness of straightforward learning on annotated/generated CoT paths. However, there is less evidence yet that reasoning capabilities can be enhanced through a reverse learning process, i.e., learning from potential mistakes in reasoning. To investigate whether LLMs can learn from mistakes, we construct mistake-correction datasets, using GPT-4 to identify and correct the mistakes in inaccurate CoTs. With these mistake-correction datasets, we fine-tune open-source LLMs and arrive at the following conclusions. (1) LLMs can indeed learn from mistakes to enhance their CoT reasoning performances. (2) Compared to CoT data, the mistake-correction data provides additional knowledge on the explanations and reasons for the potential mistakes in CoTs, which consistently contributes to the effectiveness of learning from mistakes. (3) Evolution techniques, especially the correction-centric evolution we introduced, can further enhance the effectiveness of learning from mistakes.
%U https://aclanthology.org/2024.findings-emnlp.46
%P 833-854
Markdown (Informal)
[Can LLMs Learn From Mistakes? An Empirical Study on Reasoning Tasks](https://aclanthology.org/2024.findings-emnlp.46) (An et al., Findings 2024)
ACL
- Shengnan An, Zexiong Ma, Siqi Cai, Zeqi Lin, Nanning Zheng, Jian-Guang Lou, and Weizhu Chen. 2024. Can LLMs Learn From Mistakes? An Empirical Study on Reasoning Tasks. In Findings of the Association for Computational Linguistics: EMNLP 2024, pages 833–854, Miami, Florida, USA. Association for Computational Linguistics.