@inproceedings{moosavi-etal-2022-adaptable,
title = "Adaptable Adapters",
author = "Moosavi, Nafise and
Delfosse, Quentin and
Kersting, Kristian and
Gurevych, Iryna",
editor = "Carpuat, Marine and
de Marneffe, Marie-Catherine and
Meza Ruiz, Ivan Vladimir",
booktitle = "Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies",
month = jul,
year = "2022",
address = "Seattle, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.naacl-main.274/",
doi = "10.18653/v1/2022.naacl-main.274",
pages = "3742--3753",
abstract = "State-of-the-art pretrained NLP models contain a hundred million to trillion parameters. Adapters provide a parameter-efficient alternative for the full finetuning in which we can only finetune lightweight neural network layers on top of pretrained weights. Adapter layers are initialized randomly. However, existing work uses the same adapter architecture{---}i.e., the same adapter layer on top of each layer of the pretrained model{---}for every dataset, regardless of the properties of the dataset or the amount of available training data. In this work, we introduce adaptable adapters that contain (1) learning different activation functions for different layers and different input data, and (2) a learnable switch to select and only use the beneficial adapter layers. We show that adaptable adapters achieve on-par performances with the standard adapter architecture while using a considerably smaller number of adapter layers. In addition, we show that the selected adapter architecture by adaptable adapters transfers well across different data settings and similar tasks. We propose to use adaptable adapters for designing efficient and effective adapter architectures. The resulting adapters (a) contain about 50{\%} of the learning parameters of the standard adapter and are therefore more efficient at training and inference, and require less storage space, and (b) achieve considerably higher performances in low-data settings."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="moosavi-etal-2022-adaptable">
<titleInfo>
<title>Adaptable Adapters</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nafise</namePart>
<namePart type="family">Moosavi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Quentin</namePart>
<namePart type="family">Delfosse</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kristian</namePart>
<namePart type="family">Kersting</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iryna</namePart>
<namePart type="family">Gurevych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marine</namePart>
<namePart type="family">Carpuat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie-Catherine</namePart>
<namePart type="family">de Marneffe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="given">Vladimir</namePart>
<namePart type="family">Meza Ruiz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>State-of-the-art pretrained NLP models contain a hundred million to trillion parameters. Adapters provide a parameter-efficient alternative for the full finetuning in which we can only finetune lightweight neural network layers on top of pretrained weights. Adapter layers are initialized randomly. However, existing work uses the same adapter architecture—i.e., the same adapter layer on top of each layer of the pretrained model—for every dataset, regardless of the properties of the dataset or the amount of available training data. In this work, we introduce adaptable adapters that contain (1) learning different activation functions for different layers and different input data, and (2) a learnable switch to select and only use the beneficial adapter layers. We show that adaptable adapters achieve on-par performances with the standard adapter architecture while using a considerably smaller number of adapter layers. In addition, we show that the selected adapter architecture by adaptable adapters transfers well across different data settings and similar tasks. We propose to use adaptable adapters for designing efficient and effective adapter architectures. The resulting adapters (a) contain about 50% of the learning parameters of the standard adapter and are therefore more efficient at training and inference, and require less storage space, and (b) achieve considerably higher performances in low-data settings.</abstract>
<identifier type="citekey">moosavi-etal-2022-adaptable</identifier>
<identifier type="doi">10.18653/v1/2022.naacl-main.274</identifier>
<location>
<url>https://aclanthology.org/2022.naacl-main.274/</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>3742</start>
<end>3753</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Adaptable Adapters
%A Moosavi, Nafise
%A Delfosse, Quentin
%A Kersting, Kristian
%A Gurevych, Iryna
%Y Carpuat, Marine
%Y de Marneffe, Marie-Catherine
%Y Meza Ruiz, Ivan Vladimir
%S Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies
%D 2022
%8 July
%I Association for Computational Linguistics
%C Seattle, United States
%F moosavi-etal-2022-adaptable
%X State-of-the-art pretrained NLP models contain a hundred million to trillion parameters. Adapters provide a parameter-efficient alternative for the full finetuning in which we can only finetune lightweight neural network layers on top of pretrained weights. Adapter layers are initialized randomly. However, existing work uses the same adapter architecture—i.e., the same adapter layer on top of each layer of the pretrained model—for every dataset, regardless of the properties of the dataset or the amount of available training data. In this work, we introduce adaptable adapters that contain (1) learning different activation functions for different layers and different input data, and (2) a learnable switch to select and only use the beneficial adapter layers. We show that adaptable adapters achieve on-par performances with the standard adapter architecture while using a considerably smaller number of adapter layers. In addition, we show that the selected adapter architecture by adaptable adapters transfers well across different data settings and similar tasks. We propose to use adaptable adapters for designing efficient and effective adapter architectures. The resulting adapters (a) contain about 50% of the learning parameters of the standard adapter and are therefore more efficient at training and inference, and require less storage space, and (b) achieve considerably higher performances in low-data settings.
%R 10.18653/v1/2022.naacl-main.274
%U https://aclanthology.org/2022.naacl-main.274/
%U https://doi.org/10.18653/v1/2022.naacl-main.274
%P 3742-3753
Markdown (Informal)
[Adaptable Adapters](https://aclanthology.org/2022.naacl-main.274/) (Moosavi et al., NAACL 2022)
ACL
- Nafise Moosavi, Quentin Delfosse, Kristian Kersting, and Iryna Gurevych. 2022. Adaptable Adapters. In Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pages 3742–3753, Seattle, United States. Association for Computational Linguistics.