@inproceedings{choi-etal-2023-smop,
title = "{SM}o{P}: Towards Efficient and Effective Prompt Tuning with Sparse Mixture-of-Prompts",
author = "Choi, Joon-Young and
Kim, Junho and
Park, Jun-Hyung and
Mok, Wing-Lam and
Lee, SangKeun",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.emnlp-main.884",
doi = "10.18653/v1/2023.emnlp-main.884",
pages = "14306--14316",
abstract = "Prompt tuning has emerged as a successful parameter-efficient alternative to the full fine-tuning of language models. However, prior works on prompt tuning often utilize long soft prompts of up to 100 tokens to improve performance, overlooking the inefficiency associated with extended inputs. In this paper, we propose a novel prompt tuning method $SMoP$ ($S$parse $M$ixture-$o$f-$P$rompts) that utilizes short soft prompts for efficient training and inference while maintaining performance gains typically induced from longer soft prompts. To achieve this, $SMoP$ employs a gating mechanism to train multiple short soft prompts specialized in handling different subsets of the data, providing an alternative to relying on a single long soft prompt to cover the entire data. Experimental results demonstrate that $SMoP$ outperforms baseline methods while reducing training and inference costs. We release our code at https://github.com/jyjohnchoi/SMoP.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="choi-etal-2023-smop">
<titleInfo>
<title>SMoP: Towards Efficient and Effective Prompt Tuning with Sparse Mixture-of-Prompts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Joon-Young</namePart>
<namePart type="family">Choi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junho</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jun-Hyung</namePart>
<namePart type="family">Park</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wing-Lam</namePart>
<namePart type="family">Mok</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">SangKeun</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Prompt tuning has emerged as a successful parameter-efficient alternative to the full fine-tuning of language models. However, prior works on prompt tuning often utilize long soft prompts of up to 100 tokens to improve performance, overlooking the inefficiency associated with extended inputs. In this paper, we propose a novel prompt tuning method SMoP (Sparse Mixture-of-Prompts) that utilizes short soft prompts for efficient training and inference while maintaining performance gains typically induced from longer soft prompts. To achieve this, SMoP employs a gating mechanism to train multiple short soft prompts specialized in handling different subsets of the data, providing an alternative to relying on a single long soft prompt to cover the entire data. Experimental results demonstrate that SMoP outperforms baseline methods while reducing training and inference costs. We release our code at https://github.com/jyjohnchoi/SMoP.</abstract>
<identifier type="citekey">choi-etal-2023-smop</identifier>
<identifier type="doi">10.18653/v1/2023.emnlp-main.884</identifier>
<location>
<url>https://aclanthology.org/2023.emnlp-main.884</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>14306</start>
<end>14316</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SMoP: Towards Efficient and Effective Prompt Tuning with Sparse Mixture-of-Prompts
%A Choi, Joon-Young
%A Kim, Junho
%A Park, Jun-Hyung
%A Mok, Wing-Lam
%A Lee, SangKeun
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F choi-etal-2023-smop
%X Prompt tuning has emerged as a successful parameter-efficient alternative to the full fine-tuning of language models. However, prior works on prompt tuning often utilize long soft prompts of up to 100 tokens to improve performance, overlooking the inefficiency associated with extended inputs. In this paper, we propose a novel prompt tuning method SMoP (Sparse Mixture-of-Prompts) that utilizes short soft prompts for efficient training and inference while maintaining performance gains typically induced from longer soft prompts. To achieve this, SMoP employs a gating mechanism to train multiple short soft prompts specialized in handling different subsets of the data, providing an alternative to relying on a single long soft prompt to cover the entire data. Experimental results demonstrate that SMoP outperforms baseline methods while reducing training and inference costs. We release our code at https://github.com/jyjohnchoi/SMoP.
%R 10.18653/v1/2023.emnlp-main.884
%U https://aclanthology.org/2023.emnlp-main.884
%U https://doi.org/10.18653/v1/2023.emnlp-main.884
%P 14306-14316
Markdown (Informal)
[SMoP: Towards Efficient and Effective Prompt Tuning with Sparse Mixture-of-Prompts](https://aclanthology.org/2023.emnlp-main.884) (Choi et al., EMNLP 2023)
ACL