@inproceedings{nakamura-etal-2026-demystifying,
title = "Demystifying Mixed Outcomes of Self-Training: Pre-training Analyses on Non-Toy {LLM}s",
author = "Nakamura, Yusuke and
Kiyomaru, Hirokazu and
Liu, Chaoran and
Kurita, Shuhei and
Kawahara, Daisuke",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {EACL} 2026",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-eacl.213/",
pages = "4107--4113",
ISBN = "979-8-89176-386-9",
abstract = "We investigate whether large language models (LLMs) can improve through recursive training on self-generated text, a topic where prior studies report conflicting outcomes: some find evidence of performance gains (i.e., \textit{self-improvement}), while others observe performance degradation (i.e., \textit{model collapse}). To clarify this discrepancy, we use the OLMo-2 models as non-toy LLMs and perform multiple rounds of continual pre-training using self-generated text with different prompting strategies and data filtering. Our experiments show that naive recursive self-training does not improve either perplexity or downstream task performance, regardless of model size. These results suggest that model collapse observed in naive recursive training is inherent to the training procedure itself, while self-improvement likely owes its success not to the model{'}s autonomous refinement but to human-designed, strategic synthetic pipelines that inject external intelligence."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nakamura-etal-2026-demystifying">
<titleInfo>
<title>Demystifying Mixed Outcomes of Self-Training: Pre-training Analyses on Non-Toy LLMs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yusuke</namePart>
<namePart type="family">Nakamura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hirokazu</namePart>
<namePart type="family">Kiyomaru</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chaoran</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shuhei</namePart>
<namePart type="family">Kurita</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daisuke</namePart>
<namePart type="family">Kawahara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Marquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-386-9</identifier>
</relatedItem>
<abstract>We investigate whether large language models (LLMs) can improve through recursive training on self-generated text, a topic where prior studies report conflicting outcomes: some find evidence of performance gains (i.e., self-improvement), while others observe performance degradation (i.e., model collapse). To clarify this discrepancy, we use the OLMo-2 models as non-toy LLMs and perform multiple rounds of continual pre-training using self-generated text with different prompting strategies and data filtering. Our experiments show that naive recursive self-training does not improve either perplexity or downstream task performance, regardless of model size. These results suggest that model collapse observed in naive recursive training is inherent to the training procedure itself, while self-improvement likely owes its success not to the model’s autonomous refinement but to human-designed, strategic synthetic pipelines that inject external intelligence.</abstract>
<identifier type="citekey">nakamura-etal-2026-demystifying</identifier>
<location>
<url>https://aclanthology.org/2026.findings-eacl.213/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>4107</start>
<end>4113</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Demystifying Mixed Outcomes of Self-Training: Pre-training Analyses on Non-Toy LLMs
%A Nakamura, Yusuke
%A Kiyomaru, Hirokazu
%A Liu, Chaoran
%A Kurita, Shuhei
%A Kawahara, Daisuke
%Y Demberg, Vera
%Y Inui, Kentaro
%Y Marquez, Lluís
%S Findings of the Association for Computational Linguistics: EACL 2026
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-386-9
%F nakamura-etal-2026-demystifying
%X We investigate whether large language models (LLMs) can improve through recursive training on self-generated text, a topic where prior studies report conflicting outcomes: some find evidence of performance gains (i.e., self-improvement), while others observe performance degradation (i.e., model collapse). To clarify this discrepancy, we use the OLMo-2 models as non-toy LLMs and perform multiple rounds of continual pre-training using self-generated text with different prompting strategies and data filtering. Our experiments show that naive recursive self-training does not improve either perplexity or downstream task performance, regardless of model size. These results suggest that model collapse observed in naive recursive training is inherent to the training procedure itself, while self-improvement likely owes its success not to the model’s autonomous refinement but to human-designed, strategic synthetic pipelines that inject external intelligence.
%U https://aclanthology.org/2026.findings-eacl.213/
%P 4107-4113
Markdown (Informal)
[Demystifying Mixed Outcomes of Self-Training: Pre-training Analyses on Non-Toy LLMs](https://aclanthology.org/2026.findings-eacl.213/) (Nakamura et al., Findings 2026)
ACL