@inproceedings{cho-etal-2024-heterogeneous,
title = "Heterogeneous {L}o{RA} for Federated Fine-tuning of On-Device Foundation Models",
author = "Cho, Yae Jee and
Liu, Luyang and
Xu, Zheng and
Fahrezi, Aldi and
Joshi, Gauri",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-main.717",
pages = "12903--12913",
abstract = "Foundation models (FMs) adapt surprisingly well to downstream tasks with fine-tuning. However, their colossal parameter space prohibits their training on resource-constrained edge-devices. For federated fine-tuning, we need to consider the smaller FMs of few billion parameters at most, namely on-device FMs (ODFMs), which can be deployed on-device. Federated fine-tuning of ODFMs has unique challenges non-present in standard fine-tuning: i) ODFMs poorly generalize to downstream tasks due to their limited sizes making proper fine-tuning imperative to their performance, and ii) devices have limited and heterogeneous system capabilities and data that can deter the performance of fine-tuning.Tackling these challenges, we propose HetLoRA, a feasible and effective federated fine-tuning method for ODFMs that leverages the system and data heterogeneity at the edge. HetLoRA allows heterogeneous LoRA ranks across clients for their individual system resources, and efficiently aggregates and distributes these LoRA modules in a data-aware manner by applying rank self-pruning locally and sparsity-weighted aggregation at the server. It combines the advantages of high and low-rank LoRAs, achieving improved convergence speed and final performance compared to homogeneous LoRA. Furthermore, HetLoRA has enhanced computation and communication efficiency compared to full fine-tuning making it more feasible for the edge.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cho-etal-2024-heterogeneous">
<titleInfo>
<title>Heterogeneous LoRA for Federated Fine-tuning of On-Device Foundation Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yae</namePart>
<namePart type="given">Jee</namePart>
<namePart type="family">Cho</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luyang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zheng</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aldi</namePart>
<namePart type="family">Fahrezi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gauri</namePart>
<namePart type="family">Joshi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Foundation models (FMs) adapt surprisingly well to downstream tasks with fine-tuning. However, their colossal parameter space prohibits their training on resource-constrained edge-devices. For federated fine-tuning, we need to consider the smaller FMs of few billion parameters at most, namely on-device FMs (ODFMs), which can be deployed on-device. Federated fine-tuning of ODFMs has unique challenges non-present in standard fine-tuning: i) ODFMs poorly generalize to downstream tasks due to their limited sizes making proper fine-tuning imperative to their performance, and ii) devices have limited and heterogeneous system capabilities and data that can deter the performance of fine-tuning.Tackling these challenges, we propose HetLoRA, a feasible and effective federated fine-tuning method for ODFMs that leverages the system and data heterogeneity at the edge. HetLoRA allows heterogeneous LoRA ranks across clients for their individual system resources, and efficiently aggregates and distributes these LoRA modules in a data-aware manner by applying rank self-pruning locally and sparsity-weighted aggregation at the server. It combines the advantages of high and low-rank LoRAs, achieving improved convergence speed and final performance compared to homogeneous LoRA. Furthermore, HetLoRA has enhanced computation and communication efficiency compared to full fine-tuning making it more feasible for the edge.</abstract>
<identifier type="citekey">cho-etal-2024-heterogeneous</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-main.717</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>12903</start>
<end>12913</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Heterogeneous LoRA for Federated Fine-tuning of On-Device Foundation Models
%A Cho, Yae Jee
%A Liu, Luyang
%A Xu, Zheng
%A Fahrezi, Aldi
%A Joshi, Gauri
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F cho-etal-2024-heterogeneous
%X Foundation models (FMs) adapt surprisingly well to downstream tasks with fine-tuning. However, their colossal parameter space prohibits their training on resource-constrained edge-devices. For federated fine-tuning, we need to consider the smaller FMs of few billion parameters at most, namely on-device FMs (ODFMs), which can be deployed on-device. Federated fine-tuning of ODFMs has unique challenges non-present in standard fine-tuning: i) ODFMs poorly generalize to downstream tasks due to their limited sizes making proper fine-tuning imperative to their performance, and ii) devices have limited and heterogeneous system capabilities and data that can deter the performance of fine-tuning.Tackling these challenges, we propose HetLoRA, a feasible and effective federated fine-tuning method for ODFMs that leverages the system and data heterogeneity at the edge. HetLoRA allows heterogeneous LoRA ranks across clients for their individual system resources, and efficiently aggregates and distributes these LoRA modules in a data-aware manner by applying rank self-pruning locally and sparsity-weighted aggregation at the server. It combines the advantages of high and low-rank LoRAs, achieving improved convergence speed and final performance compared to homogeneous LoRA. Furthermore, HetLoRA has enhanced computation and communication efficiency compared to full fine-tuning making it more feasible for the edge.
%U https://aclanthology.org/2024.emnlp-main.717
%P 12903-12913
Markdown (Informal)
[Heterogeneous LoRA for Federated Fine-tuning of On-Device Foundation Models](https://aclanthology.org/2024.emnlp-main.717) (Cho et al., EMNLP 2024)
ACL