@inproceedings{perin-etal-2024-rankmean,
title = "{R}ank{M}ean: Module-Level Importance Score for Merging Fine-tuned {LLM} Models",
author = "Perin, Gabriel and
Chen, Xuxi and
Liu, Shusen and
Kailkhura, Bhavya and
Wang, Zhangyang and
Gallagher, Brian",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2024",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-acl.104",
doi = "10.18653/v1/2024.findings-acl.104",
pages = "1776--1782",
abstract = "Traditionally, developing new language models (LMs) capable of addressing multiple tasks involves fine-tuning pre-trained LMs using a wide collection of datasets, a process that often incurs significant computational expenses. Model merging emerges as a cost-effective alternative, allowing the integration of existing models fine-tuned on different tasks into a single model that performs well across all tasks, eliminating the need for additional training. In this paper, we propose RankMean, an algorithm for merging fine-tuned LMs without requiring any downstream data. RankMean determines merging coefficients based on the relative rankings of weight change magnitudes and applies these coefficients for module-wise integration of various fine-tuned models. Our experimental results demonstrate that RankMean outperforms existing baseline methods on multiple benchmarks. The code is available at https://github.com/VITA-Group/RankMean.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="perin-etal-2024-rankmean">
<titleInfo>
<title>RankMean: Module-Level Importance Score for Merging Fine-tuned LLM Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gabriel</namePart>
<namePart type="family">Perin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuxi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shusen</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bhavya</namePart>
<namePart type="family">Kailkhura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhangyang</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brian</namePart>
<namePart type="family">Gallagher</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Srikumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Traditionally, developing new language models (LMs) capable of addressing multiple tasks involves fine-tuning pre-trained LMs using a wide collection of datasets, a process that often incurs significant computational expenses. Model merging emerges as a cost-effective alternative, allowing the integration of existing models fine-tuned on different tasks into a single model that performs well across all tasks, eliminating the need for additional training. In this paper, we propose RankMean, an algorithm for merging fine-tuned LMs without requiring any downstream data. RankMean determines merging coefficients based on the relative rankings of weight change magnitudes and applies these coefficients for module-wise integration of various fine-tuned models. Our experimental results demonstrate that RankMean outperforms existing baseline methods on multiple benchmarks. The code is available at https://github.com/VITA-Group/RankMean.</abstract>
<identifier type="citekey">perin-etal-2024-rankmean</identifier>
<identifier type="doi">10.18653/v1/2024.findings-acl.104</identifier>
<location>
<url>https://aclanthology.org/2024.findings-acl.104</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>1776</start>
<end>1782</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T RankMean: Module-Level Importance Score for Merging Fine-tuned LLM Models
%A Perin, Gabriel
%A Chen, Xuxi
%A Liu, Shusen
%A Kailkhura, Bhavya
%A Wang, Zhangyang
%A Gallagher, Brian
%Y Ku, Lun-Wei
%Y Martins, Andre
%Y Srikumar, Vivek
%S Findings of the Association for Computational Linguistics: ACL 2024
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F perin-etal-2024-rankmean
%X Traditionally, developing new language models (LMs) capable of addressing multiple tasks involves fine-tuning pre-trained LMs using a wide collection of datasets, a process that often incurs significant computational expenses. Model merging emerges as a cost-effective alternative, allowing the integration of existing models fine-tuned on different tasks into a single model that performs well across all tasks, eliminating the need for additional training. In this paper, we propose RankMean, an algorithm for merging fine-tuned LMs without requiring any downstream data. RankMean determines merging coefficients based on the relative rankings of weight change magnitudes and applies these coefficients for module-wise integration of various fine-tuned models. Our experimental results demonstrate that RankMean outperforms existing baseline methods on multiple benchmarks. The code is available at https://github.com/VITA-Group/RankMean.
%R 10.18653/v1/2024.findings-acl.104
%U https://aclanthology.org/2024.findings-acl.104
%U https://doi.org/10.18653/v1/2024.findings-acl.104
%P 1776-1782
Markdown (Informal)
[RankMean: Module-Level Importance Score for Merging Fine-tuned LLM Models](https://aclanthology.org/2024.findings-acl.104) (Perin et al., Findings 2024)
ACL