@inproceedings{yang-etal-2026-sparse,
title = "Sparse Brains are Also Adaptive Brains: Cognitive-Load-Aware Dynamic Activation for {LLM}s",
author = "Yang, Yiheng and
Wang, Yujie and
Ma, Chi and
Yu, Lei and
Chersoni, Emmanuele and
Huang, Chu-Ren",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {EACL} 2026",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-eacl.270/",
pages = "5124--5138",
ISBN = "979-8-89176-386-9",
abstract = "Dense large language models (LLMs) face critical efficiency bottlenecks, as they rigidly activate all parameters regardless of input complexity. While existing sparsity methods (static pruning or dynamic activation) partially address this issue, they either lack adaptivity to contextual or model structural demands or incur prohibitive computational overhead. Inspired by the human brain{'}s dual-process mechanisms {---} predictive coding (N400) for backbone sparsity and structural reanalysis (P600) for complex contexts {---} we propose \textbf{CLADA}, a \textit{\textbf{C}ognitive-\textbf{L}oad-\textbf{A}ware \textbf{D}ynamic \textbf{A}ctivation} framework that synergizes statistical sparsity with semantic adaptability.Our key insight is that LLM activations exhibit two complementary patterns: 1. \textit{Global Statistical Sparsity} driven by sequence-level prefix information, and 2. \textit{Local Semantic Adaptability} modulated by cognitive load metrics (e.g., surprisal and entropy). CLADA employs a hierarchical thresholding strategy: a baseline derived from offline error-controlled optimization ensures over 40{\%} sparsity, which is then dynamically adjusted using real-time cognitive signals. Evaluations across six mainstream LLMs and nine benchmarks demonstrate that CLADA achieves \textbf{20{\%} average speedup with less than 2{\%} accuracy degradation}, outperforming Griffin (over 5{\%} degradation) and TT (negligible speedup).Crucially, we establish the first formal connection between neurolinguistic event-related potential (ERP) components and LLM efficiency mechanisms through multi-level regression analysis ($R^2 = 0.17$), revealing a sparsity{--}adaptation synergy. Requiring no retraining or architectural changes, CLADA provides a deployable solution for resource-aware LLM inference while advancing biologically inspired AI design."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yang-etal-2026-sparse">
<titleInfo>
<title>Sparse Brains are Also Adaptive Brains: Cognitive-Load-Aware Dynamic Activation for LLMs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yiheng</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yujie</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chi</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lei</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emmanuele</namePart>
<namePart type="family">Chersoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chu-Ren</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Marquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-386-9</identifier>
</relatedItem>
<abstract>Dense large language models (LLMs) face critical efficiency bottlenecks, as they rigidly activate all parameters regardless of input complexity. While existing sparsity methods (static pruning or dynamic activation) partially address this issue, they either lack adaptivity to contextual or model structural demands or incur prohibitive computational overhead. Inspired by the human brain’s dual-process mechanisms — predictive coding (N400) for backbone sparsity and structural reanalysis (P600) for complex contexts — we propose CLADA, a Cognitive-Load-Aware Dynamic Activation framework that synergizes statistical sparsity with semantic adaptability.Our key insight is that LLM activations exhibit two complementary patterns: 1. Global Statistical Sparsity driven by sequence-level prefix information, and 2. Local Semantic Adaptability modulated by cognitive load metrics (e.g., surprisal and entropy). CLADA employs a hierarchical thresholding strategy: a baseline derived from offline error-controlled optimization ensures over 40% sparsity, which is then dynamically adjusted using real-time cognitive signals. Evaluations across six mainstream LLMs and nine benchmarks demonstrate that CLADA achieves 20% average speedup with less than 2% accuracy degradation, outperforming Griffin (over 5% degradation) and TT (negligible speedup).Crucially, we establish the first formal connection between neurolinguistic event-related potential (ERP) components and LLM efficiency mechanisms through multi-level regression analysis (R² = 0.17), revealing a sparsity–adaptation synergy. Requiring no retraining or architectural changes, CLADA provides a deployable solution for resource-aware LLM inference while advancing biologically inspired AI design.</abstract>
<identifier type="citekey">yang-etal-2026-sparse</identifier>
<location>
<url>https://aclanthology.org/2026.findings-eacl.270/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>5124</start>
<end>5138</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Sparse Brains are Also Adaptive Brains: Cognitive-Load-Aware Dynamic Activation for LLMs
%A Yang, Yiheng
%A Wang, Yujie
%A Ma, Chi
%A Yu, Lei
%A Chersoni, Emmanuele
%A Huang, Chu-Ren
%Y Demberg, Vera
%Y Inui, Kentaro
%Y Marquez, Lluís
%S Findings of the Association for Computational Linguistics: EACL 2026
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-386-9
%F yang-etal-2026-sparse
%X Dense large language models (LLMs) face critical efficiency bottlenecks, as they rigidly activate all parameters regardless of input complexity. While existing sparsity methods (static pruning or dynamic activation) partially address this issue, they either lack adaptivity to contextual or model structural demands or incur prohibitive computational overhead. Inspired by the human brain’s dual-process mechanisms — predictive coding (N400) for backbone sparsity and structural reanalysis (P600) for complex contexts — we propose CLADA, a Cognitive-Load-Aware Dynamic Activation framework that synergizes statistical sparsity with semantic adaptability.Our key insight is that LLM activations exhibit two complementary patterns: 1. Global Statistical Sparsity driven by sequence-level prefix information, and 2. Local Semantic Adaptability modulated by cognitive load metrics (e.g., surprisal and entropy). CLADA employs a hierarchical thresholding strategy: a baseline derived from offline error-controlled optimization ensures over 40% sparsity, which is then dynamically adjusted using real-time cognitive signals. Evaluations across six mainstream LLMs and nine benchmarks demonstrate that CLADA achieves 20% average speedup with less than 2% accuracy degradation, outperforming Griffin (over 5% degradation) and TT (negligible speedup).Crucially, we establish the first formal connection between neurolinguistic event-related potential (ERP) components and LLM efficiency mechanisms through multi-level regression analysis (R² = 0.17), revealing a sparsity–adaptation synergy. Requiring no retraining or architectural changes, CLADA provides a deployable solution for resource-aware LLM inference while advancing biologically inspired AI design.
%U https://aclanthology.org/2026.findings-eacl.270/
%P 5124-5138
Markdown (Informal)
[Sparse Brains are Also Adaptive Brains: Cognitive-Load-Aware Dynamic Activation for LLMs](https://aclanthology.org/2026.findings-eacl.270/) (Yang et al., Findings 2026)
ACL