@inproceedings{endy-etal-2025-mamba,
title = "Mamba Knockout for Unraveling Factual Information Flow",
author = "Endy, Nir and
Grosbard, Idan Daniel and
Ran-Milo, Yuval and
Slutzky, Yonatan and
Tshuva, Itay and
Giryes, Raja",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-long.1143/",
doi = "10.18653/v1/2025.acl-long.1143",
pages = "23457--23477",
ISBN = "979-8-89176-251-0",
abstract = "This paper investigates the flow of factual information in Mamba State-Space Model (SSM)-based language models. We rely on theoretical and empirical connections to Transformer-based architectures and their attention mechanisms. Exploiting this relationship, we adapt attentional interpretability techniques originally developed for Transformers{---}specifically, the Attention Knockout methodology{---}to both Mamba-1 and Mamba-2. Using them we trace how information is transmitted and localized across tokens and layers, revealing patterns of subject-token information emergence and layer-wise dynamics. Notably, some phenomena vary between mamba models and Transformer based models, while others appear universally across all models inspected{---}hinting that these may be inherent to LLMs in general. By further leveraging Mamba{'}s structured factorization, we disentangle how distinct ``features'' either enable token-to-token information exchange or enrich individual tokens, thus offering a unified lens to understand Mamba internal operations."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="endy-etal-2025-mamba">
<titleInfo>
<title>Mamba Knockout for Unraveling Factual Information Flow</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nir</namePart>
<namePart type="family">Endy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Idan</namePart>
<namePart type="given">Daniel</namePart>
<namePart type="family">Grosbard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuval</namePart>
<namePart type="family">Ran-Milo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yonatan</namePart>
<namePart type="family">Slutzky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Itay</namePart>
<namePart type="family">Tshuva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raja</namePart>
<namePart type="family">Giryes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-251-0</identifier>
</relatedItem>
<abstract>This paper investigates the flow of factual information in Mamba State-Space Model (SSM)-based language models. We rely on theoretical and empirical connections to Transformer-based architectures and their attention mechanisms. Exploiting this relationship, we adapt attentional interpretability techniques originally developed for Transformers—specifically, the Attention Knockout methodology—to both Mamba-1 and Mamba-2. Using them we trace how information is transmitted and localized across tokens and layers, revealing patterns of subject-token information emergence and layer-wise dynamics. Notably, some phenomena vary between mamba models and Transformer based models, while others appear universally across all models inspected—hinting that these may be inherent to LLMs in general. By further leveraging Mamba’s structured factorization, we disentangle how distinct “features” either enable token-to-token information exchange or enrich individual tokens, thus offering a unified lens to understand Mamba internal operations.</abstract>
<identifier type="citekey">endy-etal-2025-mamba</identifier>
<identifier type="doi">10.18653/v1/2025.acl-long.1143</identifier>
<location>
<url>https://aclanthology.org/2025.acl-long.1143/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>23457</start>
<end>23477</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Mamba Knockout for Unraveling Factual Information Flow
%A Endy, Nir
%A Grosbard, Idan Daniel
%A Ran-Milo, Yuval
%A Slutzky, Yonatan
%A Tshuva, Itay
%A Giryes, Raja
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-251-0
%F endy-etal-2025-mamba
%X This paper investigates the flow of factual information in Mamba State-Space Model (SSM)-based language models. We rely on theoretical and empirical connections to Transformer-based architectures and their attention mechanisms. Exploiting this relationship, we adapt attentional interpretability techniques originally developed for Transformers—specifically, the Attention Knockout methodology—to both Mamba-1 and Mamba-2. Using them we trace how information is transmitted and localized across tokens and layers, revealing patterns of subject-token information emergence and layer-wise dynamics. Notably, some phenomena vary between mamba models and Transformer based models, while others appear universally across all models inspected—hinting that these may be inherent to LLMs in general. By further leveraging Mamba’s structured factorization, we disentangle how distinct “features” either enable token-to-token information exchange or enrich individual tokens, thus offering a unified lens to understand Mamba internal operations.
%R 10.18653/v1/2025.acl-long.1143
%U https://aclanthology.org/2025.acl-long.1143/
%U https://doi.org/10.18653/v1/2025.acl-long.1143
%P 23457-23477
Markdown (Informal)
[Mamba Knockout for Unraveling Factual Information Flow](https://aclanthology.org/2025.acl-long.1143/) (Endy et al., ACL 2025)
ACL
- Nir Endy, Idan Daniel Grosbard, Yuval Ran-Milo, Yonatan Slutzky, Itay Tshuva, and Raja Giryes. 2025. Mamba Knockout for Unraveling Factual Information Flow. In Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 23457–23477, Vienna, Austria. Association for Computational Linguistics.