@inproceedings{liu-etal-2026-laco,
title = "{L}a{C}o: Layer-wise Compensation for Pruned Large Language Models",
author = "Liu, Yingen and
Wu, Fan and
Panxuyan and
Li, Ruihui and
Tang, Zhuo and
Li, Kenli",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-long.1342/",
pages = "29099--29113",
ISBN = "979-8-89176-390-6",
abstract = "Pruning is essential for the efficient deployment of Large Language Models (LLMs); however, it causes severe performance degradation due to the structural distortion induced by sparsity.Existing recovery strategies, such as LoRA, predominantly employ global fine-tuning, often overlooking the mechanistic root of this degradation: the layer-wise accumulation and amplification of local errors. To address this limitation, we propose LaCo(Layer-wise Compensation), a framework that reorients the recovery paradigm from global adaptation to hierarchical representation alignment.By sequentially optimizing each layer to reconstruct the model{'}s hidden states, LaCo effectively intercept the error propagation chain at its source.Extensive experiments demonstrate that LaCo surpasses parameter-efficient baselines in both perplexity reduction and zero-shot reasoning.Notably, it reduces recovery-time memory usage to approximately $1/7$ of the baseline and requires only 2,048 unlabeled samples to match a LoRA model trained on 50k examples{---}achieving a $\sim25\times$ improvement in data efficiency."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="liu-etal-2026-laco">
<titleInfo>
<title>LaCo: Layer-wise Compensation for Pruned Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yingen</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fan</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name>
<namePart>Panxuyan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruihui</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhuo</namePart>
<namePart type="family">Tang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kenli</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-390-6</identifier>
</relatedItem>
<abstract>Pruning is essential for the efficient deployment of Large Language Models (LLMs); however, it causes severe performance degradation due to the structural distortion induced by sparsity.Existing recovery strategies, such as LoRA, predominantly employ global fine-tuning, often overlooking the mechanistic root of this degradation: the layer-wise accumulation and amplification of local errors. To address this limitation, we propose LaCo(Layer-wise Compensation), a framework that reorients the recovery paradigm from global adaptation to hierarchical representation alignment.By sequentially optimizing each layer to reconstruct the model’s hidden states, LaCo effectively intercept the error propagation chain at its source.Extensive experiments demonstrate that LaCo surpasses parameter-efficient baselines in both perplexity reduction and zero-shot reasoning.Notably, it reduces recovery-time memory usage to approximately 1/7 of the baseline and requires only 2,048 unlabeled samples to match a LoRA model trained on 50k examples—achieving a \sim25\times improvement in data efficiency.</abstract>
<identifier type="citekey">liu-etal-2026-laco</identifier>
<location>
<url>https://aclanthology.org/2026.acl-long.1342/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>29099</start>
<end>29113</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T LaCo: Layer-wise Compensation for Pruned Large Language Models
%A Liu, Yingen
%A Wu, Fan
%A Li, Ruihui
%A Tang, Zhuo
%A Li, Kenli
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%A Panxuyan
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-390-6
%F liu-etal-2026-laco
%X Pruning is essential for the efficient deployment of Large Language Models (LLMs); however, it causes severe performance degradation due to the structural distortion induced by sparsity.Existing recovery strategies, such as LoRA, predominantly employ global fine-tuning, often overlooking the mechanistic root of this degradation: the layer-wise accumulation and amplification of local errors. To address this limitation, we propose LaCo(Layer-wise Compensation), a framework that reorients the recovery paradigm from global adaptation to hierarchical representation alignment.By sequentially optimizing each layer to reconstruct the model’s hidden states, LaCo effectively intercept the error propagation chain at its source.Extensive experiments demonstrate that LaCo surpasses parameter-efficient baselines in both perplexity reduction and zero-shot reasoning.Notably, it reduces recovery-time memory usage to approximately 1/7 of the baseline and requires only 2,048 unlabeled samples to match a LoRA model trained on 50k examples—achieving a \sim25\times improvement in data efficiency.
%U https://aclanthology.org/2026.acl-long.1342/
%P 29099-29113
Markdown (Informal)
[LaCo: Layer-wise Compensation for Pruned Large Language Models](https://aclanthology.org/2026.acl-long.1342/) (Liu et al., ACL 2026)
ACL
- Yingen Liu, Fan Wu, Panxuyan, Ruihui Li, Zhuo Tang, and Kenli Li. 2026. LaCo: Layer-wise Compensation for Pruned Large Language Models. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 29099–29113, San Diego, California, United States. Association for Computational Linguistics.