@inproceedings{zhao-etal-2025-pruning,
title = "Pruning General Large Language Models into Customized Expert Models",
author = "Zhao, Yiran and
Chen, Guizhen and
Kawaguchi, Kenji and
Bing, Lidong and
Zhang, Wenxuan",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.1201/",
doi = "10.18653/v1/2025.findings-acl.1201",
pages = "23377--23391",
ISBN = "979-8-89176-256-5",
abstract = "Large Language Models (LLMs) have transformed natural language processing, yet their substantial model sizes often demand significant computational resources. To preserve computing resources and accelerate inference speed, it is crucial to prune redundant parameters, especially for experienced users who often need expert models tailored to specific downstream scenarios. However, current pruning methods primarily focus on maintaining models' general capabilities, either requiring extensive post-training or performing poorly due to coarse-grained pruning. In this work, we design a $\underline{Cus}$tom $\underline{Prun}$ing method ($\texttt{Cus-Prun}$) to prune a large general model into a smaller lightweight expert model, which is positioned along the ``language'', ``domain'' and ``task'' dimensions. By identifying and pruning irrelevant neurons of each dimension, $\texttt{Cus-Prun}$ creates expert models without any post-training. Our experiments demonstrate that $\texttt{Cus-Prun}$ consistently outperforms other methods, achieving minimal loss in both expert and general capabilities across various models from different model families and sizes."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhao-etal-2025-pruning">
<titleInfo>
<title>Pruning General Large Language Models into Customized Expert Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yiran</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guizhen</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kenji</namePart>
<namePart type="family">Kawaguchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lidong</namePart>
<namePart type="family">Bing</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wenxuan</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-256-5</identifier>
</relatedItem>
<abstract>Large Language Models (LLMs) have transformed natural language processing, yet their substantial model sizes often demand significant computational resources. To preserve computing resources and accelerate inference speed, it is crucial to prune redundant parameters, especially for experienced users who often need expert models tailored to specific downstream scenarios. However, current pruning methods primarily focus on maintaining models’ general capabilities, either requiring extensive post-training or performing poorly due to coarse-grained pruning. In this work, we design a \underlineCustom \underlinePruning method (Cus-Prun) to prune a large general model into a smaller lightweight expert model, which is positioned along the “language”, “domain” and “task” dimensions. By identifying and pruning irrelevant neurons of each dimension, Cus-Prun creates expert models without any post-training. Our experiments demonstrate that Cus-Prun consistently outperforms other methods, achieving minimal loss in both expert and general capabilities across various models from different model families and sizes.</abstract>
<identifier type="citekey">zhao-etal-2025-pruning</identifier>
<identifier type="doi">10.18653/v1/2025.findings-acl.1201</identifier>
<location>
<url>https://aclanthology.org/2025.findings-acl.1201/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>23377</start>
<end>23391</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Pruning General Large Language Models into Customized Expert Models
%A Zhao, Yiran
%A Chen, Guizhen
%A Kawaguchi, Kenji
%A Bing, Lidong
%A Zhang, Wenxuan
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Findings of the Association for Computational Linguistics: ACL 2025
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-256-5
%F zhao-etal-2025-pruning
%X Large Language Models (LLMs) have transformed natural language processing, yet their substantial model sizes often demand significant computational resources. To preserve computing resources and accelerate inference speed, it is crucial to prune redundant parameters, especially for experienced users who often need expert models tailored to specific downstream scenarios. However, current pruning methods primarily focus on maintaining models’ general capabilities, either requiring extensive post-training or performing poorly due to coarse-grained pruning. In this work, we design a \underlineCustom \underlinePruning method (Cus-Prun) to prune a large general model into a smaller lightweight expert model, which is positioned along the “language”, “domain” and “task” dimensions. By identifying and pruning irrelevant neurons of each dimension, Cus-Prun creates expert models without any post-training. Our experiments demonstrate that Cus-Prun consistently outperforms other methods, achieving minimal loss in both expert and general capabilities across various models from different model families and sizes.
%R 10.18653/v1/2025.findings-acl.1201
%U https://aclanthology.org/2025.findings-acl.1201/
%U https://doi.org/10.18653/v1/2025.findings-acl.1201
%P 23377-23391
Markdown (Informal)
[Pruning General Large Language Models into Customized Expert Models](https://aclanthology.org/2025.findings-acl.1201/) (Zhao et al., Findings 2025)
ACL