@inproceedings{goddard-etal-2024-arcees,
title = "Arcee{'}s {M}erge{K}it: A Toolkit for Merging Large Language Models",
author = "Goddard, Charles and
Siriwardhana, Shamane and
Ehghaghi, Malikeh and
Meyers, Luke and
Karpukhin, Vladimir and
Benedict, Brian and
McQuade, Mark and
Solawetz, Jacob",
editor = "Dernoncourt, Franck and
Preo{\c{t}}iuc-Pietro, Daniel and
Shimorina, Anastasia",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track",
month = nov,
year = "2024",
address = "Miami, Florida, US",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-industry.36",
doi = "10.18653/v1/2024.emnlp-industry.36",
pages = "477--485",
abstract = "The rapid growth of open-source language models provides the opportunity to merge model checkpoints, combining their parameters to improve performance and versatility. Advances in transfer learning have led to numerous task-specific models, which model merging can integrate into powerful multitask models without additional training. MergeKit is an open-source library designed to support this process with an efficient and extensible framework suitable for any hardware. It has facilitated the merging of thousands of models, contributing to some of the world{'}s most powerful open-source model checkpoints. The library is accessible at: https://github.com/arcee-ai/mergekit.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="goddard-etal-2024-arcees">
<titleInfo>
<title>Arcee’s MergeKit: A Toolkit for Merging Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Charles</namePart>
<namePart type="family">Goddard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shamane</namePart>
<namePart type="family">Siriwardhana</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Malikeh</namePart>
<namePart type="family">Ehghaghi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luke</namePart>
<namePart type="family">Meyers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vladimir</namePart>
<namePart type="family">Karpukhin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brian</namePart>
<namePart type="family">Benedict</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">McQuade</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jacob</namePart>
<namePart type="family">Solawetz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Franck</namePart>
<namePart type="family">Dernoncourt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Preoţiuc-Pietro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anastasia</namePart>
<namePart type="family">Shimorina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, US</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The rapid growth of open-source language models provides the opportunity to merge model checkpoints, combining their parameters to improve performance and versatility. Advances in transfer learning have led to numerous task-specific models, which model merging can integrate into powerful multitask models without additional training. MergeKit is an open-source library designed to support this process with an efficient and extensible framework suitable for any hardware. It has facilitated the merging of thousands of models, contributing to some of the world’s most powerful open-source model checkpoints. The library is accessible at: https://github.com/arcee-ai/mergekit.</abstract>
<identifier type="citekey">goddard-etal-2024-arcees</identifier>
<identifier type="doi">10.18653/v1/2024.emnlp-industry.36</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-industry.36</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>477</start>
<end>485</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Arcee’s MergeKit: A Toolkit for Merging Large Language Models
%A Goddard, Charles
%A Siriwardhana, Shamane
%A Ehghaghi, Malikeh
%A Meyers, Luke
%A Karpukhin, Vladimir
%A Benedict, Brian
%A McQuade, Mark
%A Solawetz, Jacob
%Y Dernoncourt, Franck
%Y Preoţiuc-Pietro, Daniel
%Y Shimorina, Anastasia
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, US
%F goddard-etal-2024-arcees
%X The rapid growth of open-source language models provides the opportunity to merge model checkpoints, combining their parameters to improve performance and versatility. Advances in transfer learning have led to numerous task-specific models, which model merging can integrate into powerful multitask models without additional training. MergeKit is an open-source library designed to support this process with an efficient and extensible framework suitable for any hardware. It has facilitated the merging of thousands of models, contributing to some of the world’s most powerful open-source model checkpoints. The library is accessible at: https://github.com/arcee-ai/mergekit.
%R 10.18653/v1/2024.emnlp-industry.36
%U https://aclanthology.org/2024.emnlp-industry.36
%U https://doi.org/10.18653/v1/2024.emnlp-industry.36
%P 477-485
Markdown (Informal)
[Arcee’s MergeKit: A Toolkit for Merging Large Language Models](https://aclanthology.org/2024.emnlp-industry.36) (Goddard et al., EMNLP 2024)
ACL
- Charles Goddard, Shamane Siriwardhana, Malikeh Ehghaghi, Luke Meyers, Vladimir Karpukhin, Brian Benedict, Mark McQuade, and Jacob Solawetz. 2024. Arcee’s MergeKit: A Toolkit for Merging Large Language Models. In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track, pages 477–485, Miami, Florida, US. Association for Computational Linguistics.