@inproceedings{haque-etal-2025-fine,
title = "Fine-tuning {LLM}s with Cross-Attention-based Weight Decay for Bias Mitigation",
author = "Haque, Farsheed and
Fu, Zhe and
Xu, Depeng and
Yuan, Shuhan and
Niu, Xi",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-emnlp.854/",
pages = "15785--15798",
ISBN = "979-8-89176-335-7",
abstract = "Large Language Models (LLMs) excel in Natural Language Processing (NLP) tasks but often propagate societal biases from their training data, leading to discriminatory outputs. These biases are amplified by the models' self-attention mechanisms, which disproportionately emphasize biased correlations with sensitive tokens, like ``he'' or ``she'', reflecting the sensitive attributes such as gender and race. To address this issue, we propose a novel fine-tuning method, called Cross-Attention-based Weight Decay (CrAWD), which modifies the LLM architecture to mitigate bias. CrAWD introduces a cross-attention mechanism between an input sequence and a sensitive token sequence, enabling the model to identify and selectively decay the attention weights of tokens associated with sensitive tokens. This reduces the influence of biased association on the model{'}s generation while maintaining task performance. Evaluations on real-world datasets demonstrate the effectiveness of our proposed CrAWD method. Notably, our method can handle multiple sensitive attributes by adjusting the sensitive token sequence, and it does not require full knowledge of sensitive tokens presented in the dataset, underscoring CrAWD{'}s versatility in promoting fair LLMs across various applications."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="haque-etal-2025-fine">
<titleInfo>
<title>Fine-tuning LLMs with Cross-Attention-based Weight Decay for Bias Mitigation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Farsheed</namePart>
<namePart type="family">Haque</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhe</namePart>
<namePart type="family">Fu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Depeng</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shuhan</namePart>
<namePart type="family">Yuan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xi</namePart>
<namePart type="family">Niu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-335-7</identifier>
</relatedItem>
<abstract>Large Language Models (LLMs) excel in Natural Language Processing (NLP) tasks but often propagate societal biases from their training data, leading to discriminatory outputs. These biases are amplified by the models’ self-attention mechanisms, which disproportionately emphasize biased correlations with sensitive tokens, like “he” or “she”, reflecting the sensitive attributes such as gender and race. To address this issue, we propose a novel fine-tuning method, called Cross-Attention-based Weight Decay (CrAWD), which modifies the LLM architecture to mitigate bias. CrAWD introduces a cross-attention mechanism between an input sequence and a sensitive token sequence, enabling the model to identify and selectively decay the attention weights of tokens associated with sensitive tokens. This reduces the influence of biased association on the model’s generation while maintaining task performance. Evaluations on real-world datasets demonstrate the effectiveness of our proposed CrAWD method. Notably, our method can handle multiple sensitive attributes by adjusting the sensitive token sequence, and it does not require full knowledge of sensitive tokens presented in the dataset, underscoring CrAWD’s versatility in promoting fair LLMs across various applications.</abstract>
<identifier type="citekey">haque-etal-2025-fine</identifier>
<location>
<url>https://aclanthology.org/2025.findings-emnlp.854/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>15785</start>
<end>15798</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Fine-tuning LLMs with Cross-Attention-based Weight Decay for Bias Mitigation
%A Haque, Farsheed
%A Fu, Zhe
%A Xu, Depeng
%A Yuan, Shuhan
%A Niu, Xi
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Findings of the Association for Computational Linguistics: EMNLP 2025
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-335-7
%F haque-etal-2025-fine
%X Large Language Models (LLMs) excel in Natural Language Processing (NLP) tasks but often propagate societal biases from their training data, leading to discriminatory outputs. These biases are amplified by the models’ self-attention mechanisms, which disproportionately emphasize biased correlations with sensitive tokens, like “he” or “she”, reflecting the sensitive attributes such as gender and race. To address this issue, we propose a novel fine-tuning method, called Cross-Attention-based Weight Decay (CrAWD), which modifies the LLM architecture to mitigate bias. CrAWD introduces a cross-attention mechanism between an input sequence and a sensitive token sequence, enabling the model to identify and selectively decay the attention weights of tokens associated with sensitive tokens. This reduces the influence of biased association on the model’s generation while maintaining task performance. Evaluations on real-world datasets demonstrate the effectiveness of our proposed CrAWD method. Notably, our method can handle multiple sensitive attributes by adjusting the sensitive token sequence, and it does not require full knowledge of sensitive tokens presented in the dataset, underscoring CrAWD’s versatility in promoting fair LLMs across various applications.
%U https://aclanthology.org/2025.findings-emnlp.854/
%P 15785-15798
Markdown (Informal)
[Fine-tuning LLMs with Cross-Attention-based Weight Decay for Bias Mitigation](https://aclanthology.org/2025.findings-emnlp.854/) (Haque et al., Findings 2025)
ACL