@inproceedings{solano-etal-2024-sparsefit,
title = "{S}parse{F}it: Few-shot Prompting with Sparse Fine-tuning for Jointly Generating Predictions and Natural Language Explanations",
author = "Solano, Jesus and
Sanni, Mardhiyah and
Camburu, Oana-Maria and
Minervini, Pasquale",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.luhme-long.113/",
doi = "10.18653/v1/2024.acl-long.113",
pages = "2053--2077",
abstract = "Models that generate natural language explanations (NLEs) for their predictions have recently gained increasing interest. However, this approach usually demands large datasets of human-written NLEs for the ground-truth answers at training time, which can be expensive and potentially infeasible for some applications. When only a few NLEs are available (a few-shot setup), fine-tuning pre-trained language models (PLMs) in conjunction with prompt-based learning has recently shown promising results. However, PLMs typically have billions of parameters, making full fine-tuning expensive. We propose SparseFit, a sparse few-shot fine-tuning strategy that leverages discrete prompts to jointly generate predictions and NLEs. We experiment with SparseFit on three sizes of the T5 language model and four datasets and compare it against existing state-of-the-art Parameter-Efficient Fine-Tuning (PEFT) techniques. We find that fine-tuning only 6.8{\%} of the model parameters leads to competitive results for both the task performance and the quality of the generated NLEs compared to full fine-tuning of the model and produces better results on average than other PEFT methods in terms of predictive accuracy and NLE quality."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="solano-etal-2024-sparsefit">
<titleInfo>
<title>SparseFit: Few-shot Prompting with Sparse Fine-tuning for Jointly Generating Predictions and Natural Language Explanations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jesus</namePart>
<namePart type="family">Solano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mardhiyah</namePart>
<namePart type="family">Sanni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oana-Maria</namePart>
<namePart type="family">Camburu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pasquale</namePart>
<namePart type="family">Minervini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Srikumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Models that generate natural language explanations (NLEs) for their predictions have recently gained increasing interest. However, this approach usually demands large datasets of human-written NLEs for the ground-truth answers at training time, which can be expensive and potentially infeasible for some applications. When only a few NLEs are available (a few-shot setup), fine-tuning pre-trained language models (PLMs) in conjunction with prompt-based learning has recently shown promising results. However, PLMs typically have billions of parameters, making full fine-tuning expensive. We propose SparseFit, a sparse few-shot fine-tuning strategy that leverages discrete prompts to jointly generate predictions and NLEs. We experiment with SparseFit on three sizes of the T5 language model and four datasets and compare it against existing state-of-the-art Parameter-Efficient Fine-Tuning (PEFT) techniques. We find that fine-tuning only 6.8% of the model parameters leads to competitive results for both the task performance and the quality of the generated NLEs compared to full fine-tuning of the model and produces better results on average than other PEFT methods in terms of predictive accuracy and NLE quality.</abstract>
<identifier type="citekey">solano-etal-2024-sparsefit</identifier>
<identifier type="doi">10.18653/v1/2024.acl-long.113</identifier>
<location>
<url>https://aclanthology.org/2024.luhme-long.113/</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>2053</start>
<end>2077</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SparseFit: Few-shot Prompting with Sparse Fine-tuning for Jointly Generating Predictions and Natural Language Explanations
%A Solano, Jesus
%A Sanni, Mardhiyah
%A Camburu, Oana-Maria
%A Minervini, Pasquale
%Y Ku, Lun-Wei
%Y Martins, Andre
%Y Srikumar, Vivek
%S Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F solano-etal-2024-sparsefit
%X Models that generate natural language explanations (NLEs) for their predictions have recently gained increasing interest. However, this approach usually demands large datasets of human-written NLEs for the ground-truth answers at training time, which can be expensive and potentially infeasible for some applications. When only a few NLEs are available (a few-shot setup), fine-tuning pre-trained language models (PLMs) in conjunction with prompt-based learning has recently shown promising results. However, PLMs typically have billions of parameters, making full fine-tuning expensive. We propose SparseFit, a sparse few-shot fine-tuning strategy that leverages discrete prompts to jointly generate predictions and NLEs. We experiment with SparseFit on three sizes of the T5 language model and four datasets and compare it against existing state-of-the-art Parameter-Efficient Fine-Tuning (PEFT) techniques. We find that fine-tuning only 6.8% of the model parameters leads to competitive results for both the task performance and the quality of the generated NLEs compared to full fine-tuning of the model and produces better results on average than other PEFT methods in terms of predictive accuracy and NLE quality.
%R 10.18653/v1/2024.acl-long.113
%U https://aclanthology.org/2024.luhme-long.113/
%U https://doi.org/10.18653/v1/2024.acl-long.113
%P 2053-2077
Markdown (Informal)
[SparseFit: Few-shot Prompting with Sparse Fine-tuning for Jointly Generating Predictions and Natural Language Explanations](https://aclanthology.org/2024.luhme-long.113/) (Solano et al., ACL 2024)
ACL