@inproceedings{nosaka-matsuzaki-2025-timestep,
title = "Timestep Embeddings Trigger Collapse in Diffusion Text Generation",
author = "Nosaka, Ryota and
Matsuzaki, Takuya",
editor = "Boleda, Gemma and
Roth, Michael",
booktitle = "Proceedings of the 29th Conference on Computational Natural Language Learning",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.conll-1.26/",
doi = "10.18653/v1/2025.conll-1.26",
pages = "397--406",
ISBN = "979-8-89176-271-8",
abstract = "Diffusion models have achieved remarkable success in various generative tasks, particularly in image and audio synthesis, which work by iteratively refining random noise into realistic data. Recent studies have highlighted the potential of diffusion models for text generation, but several challenges remain unresolved. One significant issue is that the model begins to degrade a previous sample rather than improve it after a certain timestep in the generation process, resulting in broken text. In this paper, we reveal that timestep embeddings are a principal cause of the collapse problem by analyzing their interactions with word embeddings. Further, we propose two key methods: (a) a simple lightweight word embedding technique that enhances model analyzability as well as learning efficiency; (b) a novel regularization on both word and timestep embeddings. Experimental results demonstrate that our approach effectively mitigates the collapse problem and can lead to a considerable improvement in the quality of generated text."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nosaka-matsuzaki-2025-timestep">
<titleInfo>
<title>Timestep Embeddings Trigger Collapse in Diffusion Text Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ryota</namePart>
<namePart type="family">Nosaka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Takuya</namePart>
<namePart type="family">Matsuzaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 29th Conference on Computational Natural Language Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gemma</namePart>
<namePart type="family">Boleda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Roth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-271-8</identifier>
</relatedItem>
<abstract>Diffusion models have achieved remarkable success in various generative tasks, particularly in image and audio synthesis, which work by iteratively refining random noise into realistic data. Recent studies have highlighted the potential of diffusion models for text generation, but several challenges remain unresolved. One significant issue is that the model begins to degrade a previous sample rather than improve it after a certain timestep in the generation process, resulting in broken text. In this paper, we reveal that timestep embeddings are a principal cause of the collapse problem by analyzing their interactions with word embeddings. Further, we propose two key methods: (a) a simple lightweight word embedding technique that enhances model analyzability as well as learning efficiency; (b) a novel regularization on both word and timestep embeddings. Experimental results demonstrate that our approach effectively mitigates the collapse problem and can lead to a considerable improvement in the quality of generated text.</abstract>
<identifier type="citekey">nosaka-matsuzaki-2025-timestep</identifier>
<identifier type="doi">10.18653/v1/2025.conll-1.26</identifier>
<location>
<url>https://aclanthology.org/2025.conll-1.26/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>397</start>
<end>406</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Timestep Embeddings Trigger Collapse in Diffusion Text Generation
%A Nosaka, Ryota
%A Matsuzaki, Takuya
%Y Boleda, Gemma
%Y Roth, Michael
%S Proceedings of the 29th Conference on Computational Natural Language Learning
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-271-8
%F nosaka-matsuzaki-2025-timestep
%X Diffusion models have achieved remarkable success in various generative tasks, particularly in image and audio synthesis, which work by iteratively refining random noise into realistic data. Recent studies have highlighted the potential of diffusion models for text generation, but several challenges remain unresolved. One significant issue is that the model begins to degrade a previous sample rather than improve it after a certain timestep in the generation process, resulting in broken text. In this paper, we reveal that timestep embeddings are a principal cause of the collapse problem by analyzing their interactions with word embeddings. Further, we propose two key methods: (a) a simple lightweight word embedding technique that enhances model analyzability as well as learning efficiency; (b) a novel regularization on both word and timestep embeddings. Experimental results demonstrate that our approach effectively mitigates the collapse problem and can lead to a considerable improvement in the quality of generated text.
%R 10.18653/v1/2025.conll-1.26
%U https://aclanthology.org/2025.conll-1.26/
%U https://doi.org/10.18653/v1/2025.conll-1.26
%P 397-406
Markdown (Informal)
[Timestep Embeddings Trigger Collapse in Diffusion Text Generation](https://aclanthology.org/2025.conll-1.26/) (Nosaka & Matsuzaki, CoNLL 2025)
ACL