@inproceedings{braga-etal-2024-adakron,
title = "{A}da{K}ron: An Adapter-based Parameter Efficient Model Tuning with Kronecker Product",
author = "Braga, Marco and
Raganato, Alessandro and
Pasi, Gabriella",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.32",
pages = "350--357",
abstract = "The fine-tuning paradigm has been widely adopted to train neural models tailored for specific tasks. However, the recent upsurge of Large Language Models (LLMs), characterized by billions of parameters, has introduced profound computational challenges to the fine-tuning process. This has fueled intensive research on Parameter-Efficient Fine-Tuning (PEFT) techniques, usually involving the training of a selective subset of the original model parameters. One of the most used approaches is Adapters, which add trainable lightweight layers to the existing pretrained weights. Within this context, we propose AdaKron, an Adapter-based fine-tuning with the Kronecker product. In particular, we leverage the Kronecker product to combine the output of two small networks, resulting in a final vector whose dimension is the product of the dimensions of the individual outputs, allowing us to train only 0.55{\%} of the model{'}s original parameters. We evaluate AdaKron performing a series of experiments on the General Language Understanding Evaluation (GLUE) benchmark, achieving results in the same ballpark as recent state-of-the-art PEFT methods, despite training fewer parameters.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="braga-etal-2024-adakron">
<titleInfo>
<title>AdaKron: An Adapter-based Parameter Efficient Model Tuning with Kronecker Product</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Braga</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Raganato</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gabriella</namePart>
<namePart type="family">Pasi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The fine-tuning paradigm has been widely adopted to train neural models tailored for specific tasks. However, the recent upsurge of Large Language Models (LLMs), characterized by billions of parameters, has introduced profound computational challenges to the fine-tuning process. This has fueled intensive research on Parameter-Efficient Fine-Tuning (PEFT) techniques, usually involving the training of a selective subset of the original model parameters. One of the most used approaches is Adapters, which add trainable lightweight layers to the existing pretrained weights. Within this context, we propose AdaKron, an Adapter-based fine-tuning with the Kronecker product. In particular, we leverage the Kronecker product to combine the output of two small networks, resulting in a final vector whose dimension is the product of the dimensions of the individual outputs, allowing us to train only 0.55% of the model’s original parameters. We evaluate AdaKron performing a series of experiments on the General Language Understanding Evaluation (GLUE) benchmark, achieving results in the same ballpark as recent state-of-the-art PEFT methods, despite training fewer parameters.</abstract>
<identifier type="citekey">braga-etal-2024-adakron</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.32</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>350</start>
<end>357</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T AdaKron: An Adapter-based Parameter Efficient Model Tuning with Kronecker Product
%A Braga, Marco
%A Raganato, Alessandro
%A Pasi, Gabriella
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F braga-etal-2024-adakron
%X The fine-tuning paradigm has been widely adopted to train neural models tailored for specific tasks. However, the recent upsurge of Large Language Models (LLMs), characterized by billions of parameters, has introduced profound computational challenges to the fine-tuning process. This has fueled intensive research on Parameter-Efficient Fine-Tuning (PEFT) techniques, usually involving the training of a selective subset of the original model parameters. One of the most used approaches is Adapters, which add trainable lightweight layers to the existing pretrained weights. Within this context, we propose AdaKron, an Adapter-based fine-tuning with the Kronecker product. In particular, we leverage the Kronecker product to combine the output of two small networks, resulting in a final vector whose dimension is the product of the dimensions of the individual outputs, allowing us to train only 0.55% of the model’s original parameters. We evaluate AdaKron performing a series of experiments on the General Language Understanding Evaluation (GLUE) benchmark, achieving results in the same ballpark as recent state-of-the-art PEFT methods, despite training fewer parameters.
%U https://aclanthology.org/2024.lrec-main.32
%P 350-357
Markdown (Informal)
[AdaKron: An Adapter-based Parameter Efficient Model Tuning with Kronecker Product](https://aclanthology.org/2024.lrec-main.32) (Braga et al., LREC-COLING 2024)
ACL