@inproceedings{azizi-etal-2024-lamda,
title = "{L}a{MDA}: Large Model Fine-Tuning via Spectrally Decomposed Low-Dimensional Adaptation",
author = "Azizi, Seyedarmin and
Kundu, Souvik and
Pedram, Massoud",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.563",
pages = "9635--9646",
abstract = "Low-rank adaptation (LoRA) has become the default approach to fine-tune large language models (LLMs) due to its significant reduction in trainable parameters. However, trainable parameter demand for LoRA increases with increasing model embedding dimensions, leading to high compute costs. Additionally, its backward updates require storing high-dimensional intermediate activations and optimizer states, demanding high peak GPU memory. In this paper, we introduce {\_}LaMDA{\_}, a novel approach to fine-tuning large language models, which leverages low-dimensional adaptation to achieve significant reductions in trainable parameters and peak GPU memory footprint. LaMDA freezes a first projection matrix (PMA) in the adaptation path while introducing a low-dimensional trainable square matrix, resulting in substantial reductions in trainable parameters and peak GPU memory usage. LaMDA gradually freezes a second projection matrix (PMB) during the early fine-tuning stages, reducing the compute cost associated with weight updates to enhance parameter efficiency further.We also present an enhancement, LaMDA++, incorporating a {``}lite-weight{''} adaptive rank allocation for the LoRA path via normalized spectrum analysis of pre-trained model weights. We evaluate LaMDA/LaMDA++ across various tasks, including natural language understanding with the GLUE benchmark, text summarization, natural language generation, and complex reasoning on different LLMs.Results show that LaMDA matches or surpasses the performance of existing alternatives while requiring up to **17.7$\times$** fewer parameter updates and up to **1.32$\times$** lower peak GPU memory usage during fine-tuning. Code will be publicly available at https://github.com/ArminAzizi98/LaMDA.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="azizi-etal-2024-lamda">
<titleInfo>
<title>LaMDA: Large Model Fine-Tuning via Spectrally Decomposed Low-Dimensional Adaptation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Seyedarmin</namePart>
<namePart type="family">Azizi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Souvik</namePart>
<namePart type="family">Kundu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Massoud</namePart>
<namePart type="family">Pedram</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Low-rank adaptation (LoRA) has become the default approach to fine-tune large language models (LLMs) due to its significant reduction in trainable parameters. However, trainable parameter demand for LoRA increases with increasing model embedding dimensions, leading to high compute costs. Additionally, its backward updates require storing high-dimensional intermediate activations and optimizer states, demanding high peak GPU memory. In this paper, we introduce _LaMDA_, a novel approach to fine-tuning large language models, which leverages low-dimensional adaptation to achieve significant reductions in trainable parameters and peak GPU memory footprint. LaMDA freezes a first projection matrix (PMA) in the adaptation path while introducing a low-dimensional trainable square matrix, resulting in substantial reductions in trainable parameters and peak GPU memory usage. LaMDA gradually freezes a second projection matrix (PMB) during the early fine-tuning stages, reducing the compute cost associated with weight updates to enhance parameter efficiency further.We also present an enhancement, LaMDA++, incorporating a “lite-weight” adaptive rank allocation for the LoRA path via normalized spectrum analysis of pre-trained model weights. We evaluate LaMDA/LaMDA++ across various tasks, including natural language understanding with the GLUE benchmark, text summarization, natural language generation, and complex reasoning on different LLMs.Results show that LaMDA matches or surpasses the performance of existing alternatives while requiring up to **17.7\times** fewer parameter updates and up to **1.32\times** lower peak GPU memory usage during fine-tuning. Code will be publicly available at https://github.com/ArminAzizi98/LaMDA.</abstract>
<identifier type="citekey">azizi-etal-2024-lamda</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.563</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>9635</start>
<end>9646</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T LaMDA: Large Model Fine-Tuning via Spectrally Decomposed Low-Dimensional Adaptation
%A Azizi, Seyedarmin
%A Kundu, Souvik
%A Pedram, Massoud
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F azizi-etal-2024-lamda
%X Low-rank adaptation (LoRA) has become the default approach to fine-tune large language models (LLMs) due to its significant reduction in trainable parameters. However, trainable parameter demand for LoRA increases with increasing model embedding dimensions, leading to high compute costs. Additionally, its backward updates require storing high-dimensional intermediate activations and optimizer states, demanding high peak GPU memory. In this paper, we introduce _LaMDA_, a novel approach to fine-tuning large language models, which leverages low-dimensional adaptation to achieve significant reductions in trainable parameters and peak GPU memory footprint. LaMDA freezes a first projection matrix (PMA) in the adaptation path while introducing a low-dimensional trainable square matrix, resulting in substantial reductions in trainable parameters and peak GPU memory usage. LaMDA gradually freezes a second projection matrix (PMB) during the early fine-tuning stages, reducing the compute cost associated with weight updates to enhance parameter efficiency further.We also present an enhancement, LaMDA++, incorporating a “lite-weight” adaptive rank allocation for the LoRA path via normalized spectrum analysis of pre-trained model weights. We evaluate LaMDA/LaMDA++ across various tasks, including natural language understanding with the GLUE benchmark, text summarization, natural language generation, and complex reasoning on different LLMs.Results show that LaMDA matches or surpasses the performance of existing alternatives while requiring up to **17.7\times** fewer parameter updates and up to **1.32\times** lower peak GPU memory usage during fine-tuning. Code will be publicly available at https://github.com/ArminAzizi98/LaMDA.
%U https://aclanthology.org/2024.findings-emnlp.563
%P 9635-9646
Markdown (Informal)
[LaMDA: Large Model Fine-Tuning via Spectrally Decomposed Low-Dimensional Adaptation](https://aclanthology.org/2024.findings-emnlp.563) (Azizi et al., Findings 2024)
ACL