@inproceedings{sivakumar-etal-2025-bias,
title = "Bias after Prompting: Persistent Discrimination in Large Language Models",
author = "Sivakumar, Nivedha and
Mackraz, Natalie and
Khorshidi, Samira and
Patel, Krishna and
Theobald, Barry-John and
Zappella, Luca and
Apostoloff, Nicholas",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-emnlp.1008/",
pages = "18568--18593",
ISBN = "979-8-89176-335-7",
abstract = "A dangerous assumption that can be made from prior work on the bias transfer hypothesis (BTH) is that biases do not transfer from pre-trained large language models (LLMs) to adapted models. We invalidate this assumption by studying the BTH in causal models under prompt adaptations, as prompting is an extremely popular and accessible adaptation strategy used in real-world applications. In contrast to prior work, we find that biases can transfer through prompting and that popular prompt-based mitigation methods do not consistently prevent biases from transferring. Specifically, the correlation between intrinsic biases and those after prompt adaptation remained moderate to strong across demographics and tasks: gender (rho {\ensuremath{>}}= 0.94) in co-reference resolution, and for age (rho {\ensuremath{>}}= 0.98), religion (rho {\ensuremath{>}}= 0.69), etc., in question answering. Further, we find that biases remain strongly correlated when varying few-shot composition parameters, such as sample size, stereotypical content, occupational distribution and representational balance (rho {\ensuremath{>}}= 0.90). We evaluate several prompt-based debiasing strategies and find that different approaches have distinct strengths, but none consistently reduce bias transfer across models, tasks or demographics. These results demonstrate that correcting bias, and potentially improving reasoning ability, in intrinsic models may be reliable ways to prevent propagation of biases to downstream tasks."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sivakumar-etal-2025-bias">
<titleInfo>
<title>Bias after Prompting: Persistent Discrimination in Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nivedha</namePart>
<namePart type="family">Sivakumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Natalie</namePart>
<namePart type="family">Mackraz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samira</namePart>
<namePart type="family">Khorshidi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Krishna</namePart>
<namePart type="family">Patel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barry-John</namePart>
<namePart type="family">Theobald</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luca</namePart>
<namePart type="family">Zappella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nicholas</namePart>
<namePart type="family">Apostoloff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-335-7</identifier>
</relatedItem>
<abstract>A dangerous assumption that can be made from prior work on the bias transfer hypothesis (BTH) is that biases do not transfer from pre-trained large language models (LLMs) to adapted models. We invalidate this assumption by studying the BTH in causal models under prompt adaptations, as prompting is an extremely popular and accessible adaptation strategy used in real-world applications. In contrast to prior work, we find that biases can transfer through prompting and that popular prompt-based mitigation methods do not consistently prevent biases from transferring. Specifically, the correlation between intrinsic biases and those after prompt adaptation remained moderate to strong across demographics and tasks: gender (rho \ensuremath>= 0.94) in co-reference resolution, and for age (rho \ensuremath>= 0.98), religion (rho \ensuremath>= 0.69), etc., in question answering. Further, we find that biases remain strongly correlated when varying few-shot composition parameters, such as sample size, stereotypical content, occupational distribution and representational balance (rho \ensuremath>= 0.90). We evaluate several prompt-based debiasing strategies and find that different approaches have distinct strengths, but none consistently reduce bias transfer across models, tasks or demographics. These results demonstrate that correcting bias, and potentially improving reasoning ability, in intrinsic models may be reliable ways to prevent propagation of biases to downstream tasks.</abstract>
<identifier type="citekey">sivakumar-etal-2025-bias</identifier>
<location>
<url>https://aclanthology.org/2025.findings-emnlp.1008/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>18568</start>
<end>18593</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Bias after Prompting: Persistent Discrimination in Large Language Models
%A Sivakumar, Nivedha
%A Mackraz, Natalie
%A Khorshidi, Samira
%A Patel, Krishna
%A Theobald, Barry-John
%A Zappella, Luca
%A Apostoloff, Nicholas
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Findings of the Association for Computational Linguistics: EMNLP 2025
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-335-7
%F sivakumar-etal-2025-bias
%X A dangerous assumption that can be made from prior work on the bias transfer hypothesis (BTH) is that biases do not transfer from pre-trained large language models (LLMs) to adapted models. We invalidate this assumption by studying the BTH in causal models under prompt adaptations, as prompting is an extremely popular and accessible adaptation strategy used in real-world applications. In contrast to prior work, we find that biases can transfer through prompting and that popular prompt-based mitigation methods do not consistently prevent biases from transferring. Specifically, the correlation between intrinsic biases and those after prompt adaptation remained moderate to strong across demographics and tasks: gender (rho \ensuremath>= 0.94) in co-reference resolution, and for age (rho \ensuremath>= 0.98), religion (rho \ensuremath>= 0.69), etc., in question answering. Further, we find that biases remain strongly correlated when varying few-shot composition parameters, such as sample size, stereotypical content, occupational distribution and representational balance (rho \ensuremath>= 0.90). We evaluate several prompt-based debiasing strategies and find that different approaches have distinct strengths, but none consistently reduce bias transfer across models, tasks or demographics. These results demonstrate that correcting bias, and potentially improving reasoning ability, in intrinsic models may be reliable ways to prevent propagation of biases to downstream tasks.
%U https://aclanthology.org/2025.findings-emnlp.1008/
%P 18568-18593
Markdown (Informal)
[Bias after Prompting: Persistent Discrimination in Large Language Models](https://aclanthology.org/2025.findings-emnlp.1008/) (Sivakumar et al., Findings 2025)
ACL
- Nivedha Sivakumar, Natalie Mackraz, Samira Khorshidi, Krishna Patel, Barry-John Theobald, Luca Zappella, and Nicholas Apostoloff. 2025. Bias after Prompting: Persistent Discrimination in Large Language Models. In Findings of the Association for Computational Linguistics: EMNLP 2025, pages 18568–18593, Suzhou, China. Association for Computational Linguistics.