@inproceedings{kamel-xu-2026-rsce,
title = "{RSCE}: Training-Free Residual Stream Encoding for Persistent Context Amortization",
author = "Kamel, Adam and
Xu, Eric",
editor = "Chen, Canyu and
Zhang, Yuji and
Li, Zoey Sha and
Wang, Zihan and
Wang, Qineng and
Su, Jinyan and
Kargupta, Priyanka and
Marjanovi{\'c}, Sara Vera and
Pan, Jeff Z. and
Bansal, Mohit and
Augenstein, Isabelle and
Han, Jiawei and
Ji, Heng and
Li, Manling",
booktitle = "Proceedings of the 4th Workshop on Towards Knowledgeable Foundation Models ({K}now{FM} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.knowfm-1.11/",
pages = "138--146",
ISBN = "979-8-89176-403-3",
abstract = "A central question in the knowledge lifecycle of language models ishow externally injected signals interact with parametric memoryaccumulated during pretraining. We address this through ResidualStream Context Encoding (RSCE), a training-free method that encodesa context document $ctx$ into a single vector $C \in \mathbb{R}^{d_M}$via mean-pooling residual stream activations at a calibratedintermediate layer, then injects $C$ as an additive shift at querytime. This replaces $O(|T(ctx)|)$ attention prefill with an $O(1)$operation and reveals a previously undescribed \textit{dual-pathwayinterference} effect: vector injection alone suppresses parametricrecall \textit{below} the question-only baseline across four of fivetested architectures. This finding{---}absent in behavioral activationsteering{---}provides mechanistic evidence that LLMs maintain separatecontextual-retrieval and parametric-recall pathways that compete whenexternally injected signals are semantically rich but token-precisiondeficient. A dual-channel design pairing $C$ with a compact explicitfact block $F$ resolves this tension. We evaluate five decoder-onlyarchitectures (7B{--}70B) on multi-document QA (LongBench, $n=108$)and six on cross-file code completion (RepoBench-C), comparingagainst LongLLMLingua and EHPC. At extreme compression ($\sim$99{\%}token reduction), RSCE Vec+F is competitive with EHPC on smallerarchitectures (LLaMA-8B F1 0.333 vs. EHPC 0.334; DeepSeek-14Bboth 0.214) while both substantially outperform LongLLMLingua.RSCE is the only method achieving 81{\%} compression at 100{\%}operational reliability on code."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kamel-xu-2026-rsce">
<titleInfo>
<title>RSCE: Training-Free Residual Stream Encoding for Persistent Context Amortization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Adam</namePart>
<namePart type="family">Kamel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eric</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Workshop on Towards Knowledgeable Foundation Models (KnowFM 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Canyu</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuji</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zoey</namePart>
<namePart type="given">Sha</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zihan</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qineng</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jinyan</namePart>
<namePart type="family">Su</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Priyanka</namePart>
<namePart type="family">Kargupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="given">Vera</namePart>
<namePart type="family">Marjanović</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jeff</namePart>
<namePart type="given">Z</namePart>
<namePart type="family">Pan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isabelle</namePart>
<namePart type="family">Augenstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiawei</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heng</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manling</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-403-3</identifier>
</relatedItem>
<abstract>A central question in the knowledge lifecycle of language models ishow externally injected signals interact with parametric memoryaccumulated during pretraining. We address this through ResidualStream Context Encoding (RSCE), a training-free method that encodesa context document ctx into a single vector C ın \mathbbR^d_Mvia mean-pooling residual stream activations at a calibratedintermediate layer, then injects C as an additive shift at querytime. This replaces O(|T(ctx)|) attention prefill with an O(1)operation and reveals a previously undescribed dual-pathwayinterference effect: vector injection alone suppresses parametricrecall below the question-only baseline across four of fivetested architectures. This finding—absent in behavioral activationsteering—provides mechanistic evidence that LLMs maintain separatecontextual-retrieval and parametric-recall pathways that compete whenexternally injected signals are semantically rich but token-precisiondeficient. A dual-channel design pairing C with a compact explicitfact block F resolves this tension. We evaluate five decoder-onlyarchitectures (7B–70B) on multi-document QA (LongBench, n=108)and six on cross-file code completion (RepoBench-C), comparingagainst LongLLMLingua and EHPC. At extreme compression (\sim99%token reduction), RSCE Vec+F is competitive with EHPC on smallerarchitectures (LLaMA-8B F1 0.333 vs. EHPC 0.334; DeepSeek-14Bboth 0.214) while both substantially outperform LongLLMLingua.RSCE is the only method achieving 81% compression at 100%operational reliability on code.</abstract>
<identifier type="citekey">kamel-xu-2026-rsce</identifier>
<location>
<url>https://aclanthology.org/2026.knowfm-1.11/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>138</start>
<end>146</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T RSCE: Training-Free Residual Stream Encoding for Persistent Context Amortization
%A Kamel, Adam
%A Xu, Eric
%Y Chen, Canyu
%Y Zhang, Yuji
%Y Li, Zoey Sha
%Y Wang, Zihan
%Y Wang, Qineng
%Y Su, Jinyan
%Y Kargupta, Priyanka
%Y Marjanović, Sara Vera
%Y Pan, Jeff Z.
%Y Bansal, Mohit
%Y Augenstein, Isabelle
%Y Han, Jiawei
%Y Ji, Heng
%Y Li, Manling
%S Proceedings of the 4th Workshop on Towards Knowledgeable Foundation Models (KnowFM 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-403-3
%F kamel-xu-2026-rsce
%X A central question in the knowledge lifecycle of language models ishow externally injected signals interact with parametric memoryaccumulated during pretraining. We address this through ResidualStream Context Encoding (RSCE), a training-free method that encodesa context document ctx into a single vector C ın \mathbbR^d_Mvia mean-pooling residual stream activations at a calibratedintermediate layer, then injects C as an additive shift at querytime. This replaces O(|T(ctx)|) attention prefill with an O(1)operation and reveals a previously undescribed dual-pathwayinterference effect: vector injection alone suppresses parametricrecall below the question-only baseline across four of fivetested architectures. This finding—absent in behavioral activationsteering—provides mechanistic evidence that LLMs maintain separatecontextual-retrieval and parametric-recall pathways that compete whenexternally injected signals are semantically rich but token-precisiondeficient. A dual-channel design pairing C with a compact explicitfact block F resolves this tension. We evaluate five decoder-onlyarchitectures (7B–70B) on multi-document QA (LongBench, n=108)and six on cross-file code completion (RepoBench-C), comparingagainst LongLLMLingua and EHPC. At extreme compression (\sim99%token reduction), RSCE Vec+F is competitive with EHPC on smallerarchitectures (LLaMA-8B F1 0.333 vs. EHPC 0.334; DeepSeek-14Bboth 0.214) while both substantially outperform LongLLMLingua.RSCE is the only method achieving 81% compression at 100%operational reliability on code.
%U https://aclanthology.org/2026.knowfm-1.11/
%P 138-146
Markdown (Informal)
[RSCE: Training-Free Residual Stream Encoding for Persistent Context Amortization](https://aclanthology.org/2026.knowfm-1.11/) (Kamel & Xu, KnowFM 2026)
ACL