@inproceedings{pang-etal-2023-sharpt,
title = "{S}har{PT}: Shared Latent Space Prompt Tuning",
author = "Pang, Bo and
Yavuz, Semih and
Xiong, Caiming and
Zhou, Yingbo",
editor = "Vlachos, Andreas and
Augenstein, Isabelle",
booktitle = "Findings of the Association for Computational Linguistics: EACL 2023",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-eacl.92",
doi = "10.18653/v1/2023.findings-eacl.92",
pages = "1244--1250",
abstract = "Prompt tuning is an efficient method for adapting large language models, and Soft Prompt Transfer (SPoT) further narrows the gap between prompt tuning and full model tuning by transferring prompts learned from source tasks to target tasks. It is nevertheless difficult and expensive to identify the source task that provides optimal prompts. In this work, we propose to learn a shared latent space which captures a set of basis skills from a mixture of source tasks. Given an instance, its embedding queries the latent space, yielding a basis skill vector. This vector generates soft prompts, via a lightweight prompt generator, which modulates a frozen model. The latent space and prompt transformation are learned end-to-end by training on source tasks. Transfer learning from source tasks to a target task simply amounts to finetuning the prompt generator, accounting for roughly 0.3{\%} parameters of the frozen backbone model, while the shared latent space is also frozen in finetuning. Our approach outperforms prior soft prompt methods by a significant margin on a variety of tasks such as NLI, sentence completion, QA, conference resolution, word sense disambiguation. We also find, on various model scales, our method achieves competitive performance compared to finetuning the full model.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pang-etal-2023-sharpt">
<titleInfo>
<title>SharPT: Shared Latent Space Prompt Tuning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bo</namePart>
<namePart type="family">Pang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Semih</namePart>
<namePart type="family">Yavuz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Caiming</namePart>
<namePart type="family">Xiong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yingbo</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EACL 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Vlachos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isabelle</namePart>
<namePart type="family">Augenstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Prompt tuning is an efficient method for adapting large language models, and Soft Prompt Transfer (SPoT) further narrows the gap between prompt tuning and full model tuning by transferring prompts learned from source tasks to target tasks. It is nevertheless difficult and expensive to identify the source task that provides optimal prompts. In this work, we propose to learn a shared latent space which captures a set of basis skills from a mixture of source tasks. Given an instance, its embedding queries the latent space, yielding a basis skill vector. This vector generates soft prompts, via a lightweight prompt generator, which modulates a frozen model. The latent space and prompt transformation are learned end-to-end by training on source tasks. Transfer learning from source tasks to a target task simply amounts to finetuning the prompt generator, accounting for roughly 0.3% parameters of the frozen backbone model, while the shared latent space is also frozen in finetuning. Our approach outperforms prior soft prompt methods by a significant margin on a variety of tasks such as NLI, sentence completion, QA, conference resolution, word sense disambiguation. We also find, on various model scales, our method achieves competitive performance compared to finetuning the full model.</abstract>
<identifier type="citekey">pang-etal-2023-sharpt</identifier>
<identifier type="doi">10.18653/v1/2023.findings-eacl.92</identifier>
<location>
<url>https://aclanthology.org/2023.findings-eacl.92</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>1244</start>
<end>1250</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SharPT: Shared Latent Space Prompt Tuning
%A Pang, Bo
%A Yavuz, Semih
%A Xiong, Caiming
%A Zhou, Yingbo
%Y Vlachos, Andreas
%Y Augenstein, Isabelle
%S Findings of the Association for Computational Linguistics: EACL 2023
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F pang-etal-2023-sharpt
%X Prompt tuning is an efficient method for adapting large language models, and Soft Prompt Transfer (SPoT) further narrows the gap between prompt tuning and full model tuning by transferring prompts learned from source tasks to target tasks. It is nevertheless difficult and expensive to identify the source task that provides optimal prompts. In this work, we propose to learn a shared latent space which captures a set of basis skills from a mixture of source tasks. Given an instance, its embedding queries the latent space, yielding a basis skill vector. This vector generates soft prompts, via a lightweight prompt generator, which modulates a frozen model. The latent space and prompt transformation are learned end-to-end by training on source tasks. Transfer learning from source tasks to a target task simply amounts to finetuning the prompt generator, accounting for roughly 0.3% parameters of the frozen backbone model, while the shared latent space is also frozen in finetuning. Our approach outperforms prior soft prompt methods by a significant margin on a variety of tasks such as NLI, sentence completion, QA, conference resolution, word sense disambiguation. We also find, on various model scales, our method achieves competitive performance compared to finetuning the full model.
%R 10.18653/v1/2023.findings-eacl.92
%U https://aclanthology.org/2023.findings-eacl.92
%U https://doi.org/10.18653/v1/2023.findings-eacl.92
%P 1244-1250
Markdown (Informal)
[SharPT: Shared Latent Space Prompt Tuning](https://aclanthology.org/2023.findings-eacl.92) (Pang et al., Findings 2023)
ACL
- Bo Pang, Semih Yavuz, Caiming Xiong, and Yingbo Zhou. 2023. SharPT: Shared Latent Space Prompt Tuning. In Findings of the Association for Computational Linguistics: EACL 2023, pages 1244–1250, Dubrovnik, Croatia. Association for Computational Linguistics.