@inproceedings{wang-etal-2025-teaching,
title = "Teaching {LLM}s to Plan, Not Just Solve: Plan Learning Boosts {LLM}s Generalization in Reasoning Tasks",
author = "Wang, Tianlong and
Chen, Junzhe and
Liao, Weibin and
Han, Xueting and
Bai, Jing",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-emnlp.453/",
pages = "8531--8545",
ISBN = "979-8-89176-335-7",
abstract = "Reinforcement learning (RL) on self-generated data has emerged as a promising paradigm for improving reasoning in large language models (LLMs). However, RL relies on accurate reward signals, which are scarce in many domains, making it critical to train models that can generalize to unseen problems. Existing methods often focus on task-specific or domain-specific reasoning, lacking consideration for generalization and may degrade performance on other tasks. To address this, we distinguish between abstract plans, representing high-level problem-solving strategies, and concrete solutions, proposing that learning plans develops transferable general reasoning capabilities and promotes better generalization. Building on this insight, we propose PlanLearn, a framework that combines plan-based search with Step-level Advantage Preference Optimization (Step-APO) to optimize plan learning. Experimental results show that PlanLearn, trained exclusively on GSM8K and MATH, not only significantly improves in-domain performance but also enhances out-of-domain benchmarks, such as HumanEval (+12.2{\%}), GPQA (+8.6{\%}), ARC-C (+4.0{\%}), MMLU-STEM (+2.2{\%}), and BBH (+1.8{\%}). The code is available at https://github.com/tianlwang/PlanLearn."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-etal-2025-teaching">
<titleInfo>
<title>Teaching LLMs to Plan, Not Just Solve: Plan Learning Boosts LLMs Generalization in Reasoning Tasks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tianlong</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junzhe</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Weibin</namePart>
<namePart type="family">Liao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xueting</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jing</namePart>
<namePart type="family">Bai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-335-7</identifier>
</relatedItem>
<abstract>Reinforcement learning (RL) on self-generated data has emerged as a promising paradigm for improving reasoning in large language models (LLMs). However, RL relies on accurate reward signals, which are scarce in many domains, making it critical to train models that can generalize to unseen problems. Existing methods often focus on task-specific or domain-specific reasoning, lacking consideration for generalization and may degrade performance on other tasks. To address this, we distinguish between abstract plans, representing high-level problem-solving strategies, and concrete solutions, proposing that learning plans develops transferable general reasoning capabilities and promotes better generalization. Building on this insight, we propose PlanLearn, a framework that combines plan-based search with Step-level Advantage Preference Optimization (Step-APO) to optimize plan learning. Experimental results show that PlanLearn, trained exclusively on GSM8K and MATH, not only significantly improves in-domain performance but also enhances out-of-domain benchmarks, such as HumanEval (+12.2%), GPQA (+8.6%), ARC-C (+4.0%), MMLU-STEM (+2.2%), and BBH (+1.8%). The code is available at https://github.com/tianlwang/PlanLearn.</abstract>
<identifier type="citekey">wang-etal-2025-teaching</identifier>
<location>
<url>https://aclanthology.org/2025.findings-emnlp.453/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>8531</start>
<end>8545</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Teaching LLMs to Plan, Not Just Solve: Plan Learning Boosts LLMs Generalization in Reasoning Tasks
%A Wang, Tianlong
%A Chen, Junzhe
%A Liao, Weibin
%A Han, Xueting
%A Bai, Jing
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Findings of the Association for Computational Linguistics: EMNLP 2025
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-335-7
%F wang-etal-2025-teaching
%X Reinforcement learning (RL) on self-generated data has emerged as a promising paradigm for improving reasoning in large language models (LLMs). However, RL relies on accurate reward signals, which are scarce in many domains, making it critical to train models that can generalize to unseen problems. Existing methods often focus on task-specific or domain-specific reasoning, lacking consideration for generalization and may degrade performance on other tasks. To address this, we distinguish between abstract plans, representing high-level problem-solving strategies, and concrete solutions, proposing that learning plans develops transferable general reasoning capabilities and promotes better generalization. Building on this insight, we propose PlanLearn, a framework that combines plan-based search with Step-level Advantage Preference Optimization (Step-APO) to optimize plan learning. Experimental results show that PlanLearn, trained exclusively on GSM8K and MATH, not only significantly improves in-domain performance but also enhances out-of-domain benchmarks, such as HumanEval (+12.2%), GPQA (+8.6%), ARC-C (+4.0%), MMLU-STEM (+2.2%), and BBH (+1.8%). The code is available at https://github.com/tianlwang/PlanLearn.
%U https://aclanthology.org/2025.findings-emnlp.453/
%P 8531-8545
Markdown (Informal)
[Teaching LLMs to Plan, Not Just Solve: Plan Learning Boosts LLMs Generalization in Reasoning Tasks](https://aclanthology.org/2025.findings-emnlp.453/) (Wang et al., Findings 2025)
ACL