@inproceedings{zhou-etal-2025-rankadaptor,
title = "{R}ank{A}daptor: Hierarchical Rank Allocation for Efficient Fine-Tuning Pruned {LLM}s via Performance Model",
author = "Zhou, Changhai and
Han, Shijie and
Yang, Lining and
Zhou, Yuhua and
Cheng, Xu and
Wang, Yibin and
Li, Hongguang",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2025",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-naacl.321/",
doi = "10.18653/v1/2025.findings-naacl.321",
pages = "5781--5795",
ISBN = "979-8-89176-195-7",
abstract = "The efficient compression of large language models (LLMs) has become increasingly popular. However, recovering the performance of compressed LLMs remains a major challenge. The current practice in LLM compression entails the implementation of structural pruning, complemented by a recovery phase that leverages the Low-Rank Adaptation (LoRA) algorithm. Structural pruning{'}s uneven modification of model architecture, coupled with standard LoRA{'}s fixed configuration allocation across layers in an online pipeline, leads to suboptimal performance in various downstream tasks for pruned models. To address this challenge, we introduce RankAdaptor, a hierarchical rank allocation method that enables efficient fine-tuning of pruned LLMs according to layerwise specific recovery requirements. We employ a performance model that conducts offline meta-learning and online incremental learning to explore optimal rank values for each layer. Comprehensive experiments on popular benchmarks show that RankAdaptor consistently outperforms state-of-the-art methods across a variety of pruning settings and LLM architectures, with improvements ranging from 0.7{\%} to 5.5{\%}."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhou-etal-2025-rankadaptor">
<titleInfo>
<title>RankAdaptor: Hierarchical Rank Allocation for Efficient Fine-Tuning Pruned LLMs via Performance Model</title>
</titleInfo>
<name type="personal">
<namePart type="given">Changhai</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shijie</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lining</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuhua</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xu</namePart>
<namePart type="family">Cheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yibin</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hongguang</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: NAACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="family">Chiruzzo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lu</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-195-7</identifier>
</relatedItem>
<abstract>The efficient compression of large language models (LLMs) has become increasingly popular. However, recovering the performance of compressed LLMs remains a major challenge. The current practice in LLM compression entails the implementation of structural pruning, complemented by a recovery phase that leverages the Low-Rank Adaptation (LoRA) algorithm. Structural pruning’s uneven modification of model architecture, coupled with standard LoRA’s fixed configuration allocation across layers in an online pipeline, leads to suboptimal performance in various downstream tasks for pruned models. To address this challenge, we introduce RankAdaptor, a hierarchical rank allocation method that enables efficient fine-tuning of pruned LLMs according to layerwise specific recovery requirements. We employ a performance model that conducts offline meta-learning and online incremental learning to explore optimal rank values for each layer. Comprehensive experiments on popular benchmarks show that RankAdaptor consistently outperforms state-of-the-art methods across a variety of pruning settings and LLM architectures, with improvements ranging from 0.7% to 5.5%.</abstract>
<identifier type="citekey">zhou-etal-2025-rankadaptor</identifier>
<identifier type="doi">10.18653/v1/2025.findings-naacl.321</identifier>
<location>
<url>https://aclanthology.org/2025.findings-naacl.321/</url>
</location>
<part>
<date>2025-04</date>
<extent unit="page">
<start>5781</start>
<end>5795</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T RankAdaptor: Hierarchical Rank Allocation for Efficient Fine-Tuning Pruned LLMs via Performance Model
%A Zhou, Changhai
%A Han, Shijie
%A Yang, Lining
%A Zhou, Yuhua
%A Cheng, Xu
%A Wang, Yibin
%A Li, Hongguang
%Y Chiruzzo, Luis
%Y Ritter, Alan
%Y Wang, Lu
%S Findings of the Association for Computational Linguistics: NAACL 2025
%D 2025
%8 April
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-195-7
%F zhou-etal-2025-rankadaptor
%X The efficient compression of large language models (LLMs) has become increasingly popular. However, recovering the performance of compressed LLMs remains a major challenge. The current practice in LLM compression entails the implementation of structural pruning, complemented by a recovery phase that leverages the Low-Rank Adaptation (LoRA) algorithm. Structural pruning’s uneven modification of model architecture, coupled with standard LoRA’s fixed configuration allocation across layers in an online pipeline, leads to suboptimal performance in various downstream tasks for pruned models. To address this challenge, we introduce RankAdaptor, a hierarchical rank allocation method that enables efficient fine-tuning of pruned LLMs according to layerwise specific recovery requirements. We employ a performance model that conducts offline meta-learning and online incremental learning to explore optimal rank values for each layer. Comprehensive experiments on popular benchmarks show that RankAdaptor consistently outperforms state-of-the-art methods across a variety of pruning settings and LLM architectures, with improvements ranging from 0.7% to 5.5%.
%R 10.18653/v1/2025.findings-naacl.321
%U https://aclanthology.org/2025.findings-naacl.321/
%U https://doi.org/10.18653/v1/2025.findings-naacl.321
%P 5781-5795
Markdown (Informal)
[RankAdaptor: Hierarchical Rank Allocation for Efficient Fine-Tuning Pruned LLMs via Performance Model](https://aclanthology.org/2025.findings-naacl.321/) (Zhou et al., Findings 2025)
ACL