@inproceedings{troitskii-etal-2025-internal,
title = "Internal states before wait modulate reasoning patterns",
author = "Troitskii, Dmitrii and
Pal, Koyena and
Wendler, Chris and
McDougall, Callum Stuart",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-emnlp.1012/",
doi = "10.18653/v1/2025.findings-emnlp.1012",
pages = "18640--18649",
ISBN = "979-8-89176-335-7",
abstract = "Prior work has shown that a significant driver of performance in reasoning models is their ability to reason and self-correct. A distinctive marker in these reasoning traces is the token $\textit{wait}$, which often signals reasoning behavior such as backtracking. Despite being such a complex behavior, little is understood of exactly why models do or do not decide to reason in this particular manner, which limits our understanding of what makes a reasoning model so effective. In this work, we address the question whether model{'}s latents preceding $\textit{wait}$ tokens contain relevant information for modulating the subsequent reasoning process. We train crosscoders at multiple layers of $\texttt{DeepSeek-R1-Distill-Llama-8B}$ and its base version, and introduce a latent attribution technique in the crosscoder setting. We locate a small set of features relevant for promoting/suppressing wait tokens' probabilities. Finally, through a targeted series of experiments analyzing max-activating examples and causal interventions, we show that many of our identified features indeed are relevant for the reasoning process and give rise to different types of reasoning patterns such as restarting from the beginning, recalling prior knowledge, expressing uncertainty, and double-checking."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="troitskii-etal-2025-internal">
<titleInfo>
<title>Internal states before wait modulate reasoning patterns</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dmitrii</namePart>
<namePart type="family">Troitskii</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Koyena</namePart>
<namePart type="family">Pal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Wendler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Callum</namePart>
<namePart type="given">Stuart</namePart>
<namePart type="family">McDougall</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-335-7</identifier>
</relatedItem>
<abstract>Prior work has shown that a significant driver of performance in reasoning models is their ability to reason and self-correct. A distinctive marker in these reasoning traces is the token wait, which often signals reasoning behavior such as backtracking. Despite being such a complex behavior, little is understood of exactly why models do or do not decide to reason in this particular manner, which limits our understanding of what makes a reasoning model so effective. In this work, we address the question whether model’s latents preceding wait tokens contain relevant information for modulating the subsequent reasoning process. We train crosscoders at multiple layers of DeepSeek-R1-Distill-Llama-8B and its base version, and introduce a latent attribution technique in the crosscoder setting. We locate a small set of features relevant for promoting/suppressing wait tokens’ probabilities. Finally, through a targeted series of experiments analyzing max-activating examples and causal interventions, we show that many of our identified features indeed are relevant for the reasoning process and give rise to different types of reasoning patterns such as restarting from the beginning, recalling prior knowledge, expressing uncertainty, and double-checking.</abstract>
<identifier type="citekey">troitskii-etal-2025-internal</identifier>
<identifier type="doi">10.18653/v1/2025.findings-emnlp.1012</identifier>
<location>
<url>https://aclanthology.org/2025.findings-emnlp.1012/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>18640</start>
<end>18649</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Internal states before wait modulate reasoning patterns
%A Troitskii, Dmitrii
%A Pal, Koyena
%A Wendler, Chris
%A McDougall, Callum Stuart
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Findings of the Association for Computational Linguistics: EMNLP 2025
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-335-7
%F troitskii-etal-2025-internal
%X Prior work has shown that a significant driver of performance in reasoning models is their ability to reason and self-correct. A distinctive marker in these reasoning traces is the token wait, which often signals reasoning behavior such as backtracking. Despite being such a complex behavior, little is understood of exactly why models do or do not decide to reason in this particular manner, which limits our understanding of what makes a reasoning model so effective. In this work, we address the question whether model’s latents preceding wait tokens contain relevant information for modulating the subsequent reasoning process. We train crosscoders at multiple layers of DeepSeek-R1-Distill-Llama-8B and its base version, and introduce a latent attribution technique in the crosscoder setting. We locate a small set of features relevant for promoting/suppressing wait tokens’ probabilities. Finally, through a targeted series of experiments analyzing max-activating examples and causal interventions, we show that many of our identified features indeed are relevant for the reasoning process and give rise to different types of reasoning patterns such as restarting from the beginning, recalling prior knowledge, expressing uncertainty, and double-checking.
%R 10.18653/v1/2025.findings-emnlp.1012
%U https://aclanthology.org/2025.findings-emnlp.1012/
%U https://doi.org/10.18653/v1/2025.findings-emnlp.1012
%P 18640-18649
Markdown (Informal)
[Internal states before wait modulate reasoning patterns](https://aclanthology.org/2025.findings-emnlp.1012/) (Troitskii et al., Findings 2025)
ACL
- Dmitrii Troitskii, Koyena Pal, Chris Wendler, and Callum Stuart McDougall. 2025. Internal states before wait modulate reasoning patterns. In Findings of the Association for Computational Linguistics: EMNLP 2025, pages 18640–18649, Suzhou, China. Association for Computational Linguistics.