@inproceedings{aguirre-etal-2024-selecting,
title = "Selecting Shots for Demographic Fairness in Few-Shot Learning with Large Language Models",
author = "Aguirre, Carlos Alejandro and
Sasse, Kuleen and
Cachola, Isabel Alyssa and
Dredze, Mark",
editor = "Dementieva, Daryna and
Ignat, Oana and
Jin, Zhijing and
Mihalcea, Rada and
Piatti, Giorgio and
Tetreault, Joel and
Wilson, Steven and
Zhao, Jieyu",
booktitle = "Proceedings of the Third Workshop on NLP for Positive Impact",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.nlp4pi-1.4",
pages = "50--67",
abstract = "Recently, work in NLP has shifted to few-shot (in-context) learning, with large language models (LLMs) performing well across a range of tasks. However, while fairness evaluations have become a standard for supervised methods, little is known about the fairness of LLMs as prediction systems. Further, common standard methods for fairness involve access to model weights or are applied during finetuning, which are not applicable in few-shot learning. Do LLMs exhibit prediction biases when used for standard NLP tasks?In this work, we analyze the effect of shots, which directly affect the performance of models, on the fairness of LLMs as NLP classification systems. We consider how different shot selection strategies, both existing and new demographically sensitive methods, affect model fairness across three standard fairness datasets. We find that overall the performance of LLMs is not indicative of their fairness, and there is not a single method that fits all scenarios. In light of these facts, we discuss how future work can include LLM fairness in evaluations.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="aguirre-etal-2024-selecting">
<titleInfo>
<title>Selecting Shots for Demographic Fairness in Few-Shot Learning with Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Carlos</namePart>
<namePart type="given">Alejandro</namePart>
<namePart type="family">Aguirre</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kuleen</namePart>
<namePart type="family">Sasse</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isabel</namePart>
<namePart type="given">Alyssa</namePart>
<namePart type="family">Cachola</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Dredze</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on NLP for Positive Impact</title>
</titleInfo>
<name type="personal">
<namePart type="given">Daryna</namePart>
<namePart type="family">Dementieva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oana</namePart>
<namePart type="family">Ignat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhijing</namePart>
<namePart type="family">Jin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rada</namePart>
<namePart type="family">Mihalcea</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giorgio</namePart>
<namePart type="family">Piatti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joel</namePart>
<namePart type="family">Tetreault</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Wilson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jieyu</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recently, work in NLP has shifted to few-shot (in-context) learning, with large language models (LLMs) performing well across a range of tasks. However, while fairness evaluations have become a standard for supervised methods, little is known about the fairness of LLMs as prediction systems. Further, common standard methods for fairness involve access to model weights or are applied during finetuning, which are not applicable in few-shot learning. Do LLMs exhibit prediction biases when used for standard NLP tasks?In this work, we analyze the effect of shots, which directly affect the performance of models, on the fairness of LLMs as NLP classification systems. We consider how different shot selection strategies, both existing and new demographically sensitive methods, affect model fairness across three standard fairness datasets. We find that overall the performance of LLMs is not indicative of their fairness, and there is not a single method that fits all scenarios. In light of these facts, we discuss how future work can include LLM fairness in evaluations.</abstract>
<identifier type="citekey">aguirre-etal-2024-selecting</identifier>
<location>
<url>https://aclanthology.org/2024.nlp4pi-1.4</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>50</start>
<end>67</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Selecting Shots for Demographic Fairness in Few-Shot Learning with Large Language Models
%A Aguirre, Carlos Alejandro
%A Sasse, Kuleen
%A Cachola, Isabel Alyssa
%A Dredze, Mark
%Y Dementieva, Daryna
%Y Ignat, Oana
%Y Jin, Zhijing
%Y Mihalcea, Rada
%Y Piatti, Giorgio
%Y Tetreault, Joel
%Y Wilson, Steven
%Y Zhao, Jieyu
%S Proceedings of the Third Workshop on NLP for Positive Impact
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F aguirre-etal-2024-selecting
%X Recently, work in NLP has shifted to few-shot (in-context) learning, with large language models (LLMs) performing well across a range of tasks. However, while fairness evaluations have become a standard for supervised methods, little is known about the fairness of LLMs as prediction systems. Further, common standard methods for fairness involve access to model weights or are applied during finetuning, which are not applicable in few-shot learning. Do LLMs exhibit prediction biases when used for standard NLP tasks?In this work, we analyze the effect of shots, which directly affect the performance of models, on the fairness of LLMs as NLP classification systems. We consider how different shot selection strategies, both existing and new demographically sensitive methods, affect model fairness across three standard fairness datasets. We find that overall the performance of LLMs is not indicative of their fairness, and there is not a single method that fits all scenarios. In light of these facts, we discuss how future work can include LLM fairness in evaluations.
%U https://aclanthology.org/2024.nlp4pi-1.4
%P 50-67
Markdown (Informal)
[Selecting Shots for Demographic Fairness in Few-Shot Learning with Large Language Models](https://aclanthology.org/2024.nlp4pi-1.4) (Aguirre et al., NLP4PI 2024)
ACL