@inproceedings{munoz-etal-2024-lonas,
title = "{L}o{NAS}: Elastic Low-Rank Adapters for Efficient Large Language Models",
author = "Munoz, Juan Pablo and
Yuan, Jinjie and
Zheng, Yi and
Jain, Nilesh",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.940",
pages = "10760--10776",
abstract = "Large Language Models (LLMs) continue to grow, reaching hundreds of billions of parameters and making it challenging for Deep Learning practitioners with resource-constrained systems to use them, e.g., fine-tuning these models for a downstream task of their interest. Adapters, such as low-rank adapters (LoRA), have been proposed to reduce the number of trainable parameters in a model, reducing memory requirements and enabling smaller systems to fine-tune these models. Orthogonal to this work, Neural Architecture Search (NAS) has been used to discover compressed and more efficient architectures without sacrificing performance compared to similar base models. This paper introduces a novel approach, LoNAS, to use NAS on language models by exploring a search space of elastic low-rank adapters while reducing memory and compute requirements of full-scale NAS, resulting in high-performing compressed models obtained from weight-sharing super-networks. Compared to models fine-tuned with LoRA, these models contain fewer total parameters, reducing the inference time with only minor decreases in accuracy and, in some cases, even improving accuracy. We discuss the limitations of LoNAS and share observations for the research community regarding its generalization capabilities, which have motivated our follow-up work.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="munoz-etal-2024-lonas">
<titleInfo>
<title>LoNAS: Elastic Low-Rank Adapters for Efficient Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="given">Pablo</namePart>
<namePart type="family">Munoz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jinjie</namePart>
<namePart type="family">Yuan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yi</namePart>
<namePart type="family">Zheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nilesh</namePart>
<namePart type="family">Jain</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large Language Models (LLMs) continue to grow, reaching hundreds of billions of parameters and making it challenging for Deep Learning practitioners with resource-constrained systems to use them, e.g., fine-tuning these models for a downstream task of their interest. Adapters, such as low-rank adapters (LoRA), have been proposed to reduce the number of trainable parameters in a model, reducing memory requirements and enabling smaller systems to fine-tune these models. Orthogonal to this work, Neural Architecture Search (NAS) has been used to discover compressed and more efficient architectures without sacrificing performance compared to similar base models. This paper introduces a novel approach, LoNAS, to use NAS on language models by exploring a search space of elastic low-rank adapters while reducing memory and compute requirements of full-scale NAS, resulting in high-performing compressed models obtained from weight-sharing super-networks. Compared to models fine-tuned with LoRA, these models contain fewer total parameters, reducing the inference time with only minor decreases in accuracy and, in some cases, even improving accuracy. We discuss the limitations of LoNAS and share observations for the research community regarding its generalization capabilities, which have motivated our follow-up work.</abstract>
<identifier type="citekey">munoz-etal-2024-lonas</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.940</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>10760</start>
<end>10776</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T LoNAS: Elastic Low-Rank Adapters for Efficient Large Language Models
%A Munoz, Juan Pablo
%A Yuan, Jinjie
%A Zheng, Yi
%A Jain, Nilesh
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F munoz-etal-2024-lonas
%X Large Language Models (LLMs) continue to grow, reaching hundreds of billions of parameters and making it challenging for Deep Learning practitioners with resource-constrained systems to use them, e.g., fine-tuning these models for a downstream task of their interest. Adapters, such as low-rank adapters (LoRA), have been proposed to reduce the number of trainable parameters in a model, reducing memory requirements and enabling smaller systems to fine-tune these models. Orthogonal to this work, Neural Architecture Search (NAS) has been used to discover compressed and more efficient architectures without sacrificing performance compared to similar base models. This paper introduces a novel approach, LoNAS, to use NAS on language models by exploring a search space of elastic low-rank adapters while reducing memory and compute requirements of full-scale NAS, resulting in high-performing compressed models obtained from weight-sharing super-networks. Compared to models fine-tuned with LoRA, these models contain fewer total parameters, reducing the inference time with only minor decreases in accuracy and, in some cases, even improving accuracy. We discuss the limitations of LoNAS and share observations for the research community regarding its generalization capabilities, which have motivated our follow-up work.
%U https://aclanthology.org/2024.lrec-main.940
%P 10760-10776
Markdown (Informal)
[LoNAS: Elastic Low-Rank Adapters for Efficient Large Language Models](https://aclanthology.org/2024.lrec-main.940) (Munoz et al., LREC-COLING 2024)
ACL