@inproceedings{song-etal-2025-fastcurl,
title = "{F}ast{C}u{RL}: Curriculum Reinforcement Learning with Stage-wise Context Scaling for Efficient Training R1-like Reasoning Models",
author = "Song, Mingyang and
Zheng, Mao and
Li, Zheng and
Yang, Wenjie and
Luo, Xuan",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-emnlp.470/",
pages = "8856--8866",
ISBN = "979-8-89176-335-7",
abstract = "Improving training efficiency continues to be one of the primary challenges in large-scale Reinforcement Learning (RL). In this paper, we investigate how \textbf{ \textit{context length}} and \textbf{ \textit{the complexity of training data}} influence \textit{the RL scaling training process of R1-distilled reasoning models, e.g., DeepSeek-R1-Distill-Qwen-1.5B}.\textit{Our experimental results reveal that:} text-green\textit{(1) simply controlling the context length and selecting the training data based on the input prompt length can effectively improve the training efficiency of RL scaling, achieving better performance with more concise CoT;} text-blue\textit{(2) properly scaling the context length helps mitigate entropy collapse;} text-red\textit{and (3) carefully choosing the context length facilitates achieving efficient LLM training and reasoning}. Inspired by these insights, we propose \textbf{FastCuRL}, a curriculum RL framework with stage-wise context scaling to achieve efficient LLM training and reasoning. Extensive experimental results demonstrate that \textbf{FastCuRL-1.5B-V3} significantly outperforms state-of-the-art reasoning models on five competition-level benchmarks and achieves 49.6{\%} accuracy on AIME 2024. Furthermore, \textbf{FastCuRL-1.5B-Preview} surpasses DeepScaleR-1.5B-Preview on five benchmarks while only using a single node with 8 GPUs and a total of 50{\%} of training steps."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="song-etal-2025-fastcurl">
<titleInfo>
<title>FastCuRL: Curriculum Reinforcement Learning with Stage-wise Context Scaling for Efficient Training R1-like Reasoning Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mingyang</namePart>
<namePart type="family">Song</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mao</namePart>
<namePart type="family">Zheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zheng</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wenjie</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuan</namePart>
<namePart type="family">Luo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-335-7</identifier>
</relatedItem>
<abstract>Improving training efficiency continues to be one of the primary challenges in large-scale Reinforcement Learning (RL). In this paper, we investigate how context length and the complexity of training data influence the RL scaling training process of R1-distilled reasoning models, e.g., DeepSeek-R1-Distill-Qwen-1.5B.Our experimental results reveal that: text-green(1) simply controlling the context length and selecting the training data based on the input prompt length can effectively improve the training efficiency of RL scaling, achieving better performance with more concise CoT; text-blue(2) properly scaling the context length helps mitigate entropy collapse; text-redand (3) carefully choosing the context length facilitates achieving efficient LLM training and reasoning. Inspired by these insights, we propose FastCuRL, a curriculum RL framework with stage-wise context scaling to achieve efficient LLM training and reasoning. Extensive experimental results demonstrate that FastCuRL-1.5B-V3 significantly outperforms state-of-the-art reasoning models on five competition-level benchmarks and achieves 49.6% accuracy on AIME 2024. Furthermore, FastCuRL-1.5B-Preview surpasses DeepScaleR-1.5B-Preview on five benchmarks while only using a single node with 8 GPUs and a total of 50% of training steps.</abstract>
<identifier type="citekey">song-etal-2025-fastcurl</identifier>
<location>
<url>https://aclanthology.org/2025.findings-emnlp.470/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>8856</start>
<end>8866</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T FastCuRL: Curriculum Reinforcement Learning with Stage-wise Context Scaling for Efficient Training R1-like Reasoning Models
%A Song, Mingyang
%A Zheng, Mao
%A Li, Zheng
%A Yang, Wenjie
%A Luo, Xuan
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Findings of the Association for Computational Linguistics: EMNLP 2025
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-335-7
%F song-etal-2025-fastcurl
%X Improving training efficiency continues to be one of the primary challenges in large-scale Reinforcement Learning (RL). In this paper, we investigate how context length and the complexity of training data influence the RL scaling training process of R1-distilled reasoning models, e.g., DeepSeek-R1-Distill-Qwen-1.5B.Our experimental results reveal that: text-green(1) simply controlling the context length and selecting the training data based on the input prompt length can effectively improve the training efficiency of RL scaling, achieving better performance with more concise CoT; text-blue(2) properly scaling the context length helps mitigate entropy collapse; text-redand (3) carefully choosing the context length facilitates achieving efficient LLM training and reasoning. Inspired by these insights, we propose FastCuRL, a curriculum RL framework with stage-wise context scaling to achieve efficient LLM training and reasoning. Extensive experimental results demonstrate that FastCuRL-1.5B-V3 significantly outperforms state-of-the-art reasoning models on five competition-level benchmarks and achieves 49.6% accuracy on AIME 2024. Furthermore, FastCuRL-1.5B-Preview surpasses DeepScaleR-1.5B-Preview on five benchmarks while only using a single node with 8 GPUs and a total of 50% of training steps.
%U https://aclanthology.org/2025.findings-emnlp.470/
%P 8856-8866
Markdown (Informal)
[FastCuRL: Curriculum Reinforcement Learning with Stage-wise Context Scaling for Efficient Training R1-like Reasoning Models](https://aclanthology.org/2025.findings-emnlp.470/) (Song et al., Findings 2025)
ACL