@inproceedings{li-etal-2025-language-models,
title = "Language Models Can Easily Learn to Reason from Demonstrations",
author = "Li, Dacheng and
Cao, Shiyi and
Griggs, Tyler and
Liu, Shu and
Mo, Xiangxi and
Tang, Eric and
Hegde, Sumanth and
Hakhamaneshi, Kourosh and
Patil, Shishir G and
Zaharia, Matei and
Gonzalez, Joseph E. and
Stoica, Ion",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-emnlp.866/",
pages = "15979--15997",
ISBN = "979-8-89176-335-7",
abstract = "Large reasoning models (LRMs) tackle complex problems by following long chain-of-thoughts (Long CoT) that incorporate reflection, backtracking, and self-validation. However, the training techniques and data requirements to elicit Long CoT remain poorly understood. In this work, we find that language models can effectively learn Long CoT reasoning through data-efficient supervised fine-tuning (SFT) and further parameter-efficient low-rank adaptation (LoRA). Crucially, we find that the structure of Long CoT is critical to the learning process in this data-efficient fine-tuning process. Training on content-incorrect examples, e.g. those lead to incorrect answers or corrupted digits, still leads to significant performance gains. In contrast, training on structurally incorrect examples, e.g., with shuffled or deleted reasoning steps, yield smaller improvements or even degrade performance."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="li-etal-2025-language-models">
<titleInfo>
<title>Language Models Can Easily Learn to Reason from Demonstrations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dacheng</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shiyi</namePart>
<namePart type="family">Cao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tyler</namePart>
<namePart type="family">Griggs</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shu</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiangxi</namePart>
<namePart type="family">Mo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eric</namePart>
<namePart type="family">Tang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sumanth</namePart>
<namePart type="family">Hegde</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kourosh</namePart>
<namePart type="family">Hakhamaneshi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shishir</namePart>
<namePart type="given">G</namePart>
<namePart type="family">Patil</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matei</namePart>
<namePart type="family">Zaharia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="given">E</namePart>
<namePart type="family">Gonzalez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ion</namePart>
<namePart type="family">Stoica</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-335-7</identifier>
</relatedItem>
<abstract>Large reasoning models (LRMs) tackle complex problems by following long chain-of-thoughts (Long CoT) that incorporate reflection, backtracking, and self-validation. However, the training techniques and data requirements to elicit Long CoT remain poorly understood. In this work, we find that language models can effectively learn Long CoT reasoning through data-efficient supervised fine-tuning (SFT) and further parameter-efficient low-rank adaptation (LoRA). Crucially, we find that the structure of Long CoT is critical to the learning process in this data-efficient fine-tuning process. Training on content-incorrect examples, e.g. those lead to incorrect answers or corrupted digits, still leads to significant performance gains. In contrast, training on structurally incorrect examples, e.g., with shuffled or deleted reasoning steps, yield smaller improvements or even degrade performance.</abstract>
<identifier type="citekey">li-etal-2025-language-models</identifier>
<location>
<url>https://aclanthology.org/2025.findings-emnlp.866/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>15979</start>
<end>15997</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Language Models Can Easily Learn to Reason from Demonstrations
%A Li, Dacheng
%A Cao, Shiyi
%A Griggs, Tyler
%A Liu, Shu
%A Mo, Xiangxi
%A Tang, Eric
%A Hegde, Sumanth
%A Hakhamaneshi, Kourosh
%A Patil, Shishir G.
%A Zaharia, Matei
%A Gonzalez, Joseph E.
%A Stoica, Ion
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Findings of the Association for Computational Linguistics: EMNLP 2025
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-335-7
%F li-etal-2025-language-models
%X Large reasoning models (LRMs) tackle complex problems by following long chain-of-thoughts (Long CoT) that incorporate reflection, backtracking, and self-validation. However, the training techniques and data requirements to elicit Long CoT remain poorly understood. In this work, we find that language models can effectively learn Long CoT reasoning through data-efficient supervised fine-tuning (SFT) and further parameter-efficient low-rank adaptation (LoRA). Crucially, we find that the structure of Long CoT is critical to the learning process in this data-efficient fine-tuning process. Training on content-incorrect examples, e.g. those lead to incorrect answers or corrupted digits, still leads to significant performance gains. In contrast, training on structurally incorrect examples, e.g., with shuffled or deleted reasoning steps, yield smaller improvements or even degrade performance.
%U https://aclanthology.org/2025.findings-emnlp.866/
%P 15979-15997
Markdown (Informal)
[Language Models Can Easily Learn to Reason from Demonstrations](https://aclanthology.org/2025.findings-emnlp.866/) (Li et al., Findings 2025)
ACL
- Dacheng Li, Shiyi Cao, Tyler Griggs, Shu Liu, Xiangxi Mo, Eric Tang, Sumanth Hegde, Kourosh Hakhamaneshi, Shishir G Patil, Matei Zaharia, Joseph E. Gonzalez, and Ion Stoica. 2025. Language Models Can Easily Learn to Reason from Demonstrations. In Findings of the Association for Computational Linguistics: EMNLP 2025, pages 15979–15997, Suzhou, China. Association for Computational Linguistics.