@inproceedings{rakshit-etal-2025-prejudice,
title = "From Prejudice to Parity: A New Approach to Debiasing Large Language Model Word Embeddings",
author = "Rakshit, Aishik and
Singh, Smriti and
Keshari, Shuvam and
Ghosh Chowdhury, Arijit and
Jain, Vinija and
Chadha, Aman",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.coling-main.450/",
pages = "6718--6747",
abstract = "Embeddings play a pivotal role in the efficacy of large language models. They are the bedrock on which these models grasp contextual relationships and foster a more nuanced understanding of language and consequently perform complex tasks that require a fundamental understanding of human language. Given that these embeddings themselves often reflect or exhibit bias, it stands to reason that these models may also inadvertently learn this bias. In this work, we build on the aforementioned seminal work of (CITATION) and (CITATION) and propose \textit{DeepSoftDebias}, an algorithm that uses a neural network to perform {\textquoteleft}soft debiasing'. We exhaustively evaluate this algorithm across a variety of state-of-the-art datasets, accuracy metrics, and challenging NLP tasks. On a wide range of metrics, we find that \textit{DeepSoftDebias} outperforms the current state-of-the-art methods at reducing bias across gender, race, and religion."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rakshit-etal-2025-prejudice">
<titleInfo>
<title>From Prejudice to Parity: A New Approach to Debiasing Large Language Model Word Embeddings</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aishik</namePart>
<namePart type="family">Rakshit</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Smriti</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shuvam</namePart>
<namePart type="family">Keshari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arijit</namePart>
<namePart type="family">Ghosh Chowdhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vinija</namePart>
<namePart type="family">Jain</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aman</namePart>
<namePart type="family">Chadha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 31st International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="given">Di</namePart>
<namePart type="family">Eugenio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Schockaert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Embeddings play a pivotal role in the efficacy of large language models. They are the bedrock on which these models grasp contextual relationships and foster a more nuanced understanding of language and consequently perform complex tasks that require a fundamental understanding of human language. Given that these embeddings themselves often reflect or exhibit bias, it stands to reason that these models may also inadvertently learn this bias. In this work, we build on the aforementioned seminal work of (CITATION) and (CITATION) and propose DeepSoftDebias, an algorithm that uses a neural network to perform ‘soft debiasing’. We exhaustively evaluate this algorithm across a variety of state-of-the-art datasets, accuracy metrics, and challenging NLP tasks. On a wide range of metrics, we find that DeepSoftDebias outperforms the current state-of-the-art methods at reducing bias across gender, race, and religion.</abstract>
<identifier type="citekey">rakshit-etal-2025-prejudice</identifier>
<location>
<url>https://aclanthology.org/2025.coling-main.450/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>6718</start>
<end>6747</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T From Prejudice to Parity: A New Approach to Debiasing Large Language Model Word Embeddings
%A Rakshit, Aishik
%A Singh, Smriti
%A Keshari, Shuvam
%A Ghosh Chowdhury, Arijit
%A Jain, Vinija
%A Chadha, Aman
%Y Rambow, Owen
%Y Wanner, Leo
%Y Apidianaki, Marianna
%Y Al-Khalifa, Hend
%Y Eugenio, Barbara Di
%Y Schockaert, Steven
%S Proceedings of the 31st International Conference on Computational Linguistics
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F rakshit-etal-2025-prejudice
%X Embeddings play a pivotal role in the efficacy of large language models. They are the bedrock on which these models grasp contextual relationships and foster a more nuanced understanding of language and consequently perform complex tasks that require a fundamental understanding of human language. Given that these embeddings themselves often reflect or exhibit bias, it stands to reason that these models may also inadvertently learn this bias. In this work, we build on the aforementioned seminal work of (CITATION) and (CITATION) and propose DeepSoftDebias, an algorithm that uses a neural network to perform ‘soft debiasing’. We exhaustively evaluate this algorithm across a variety of state-of-the-art datasets, accuracy metrics, and challenging NLP tasks. On a wide range of metrics, we find that DeepSoftDebias outperforms the current state-of-the-art methods at reducing bias across gender, race, and religion.
%U https://aclanthology.org/2025.coling-main.450/
%P 6718-6747
Markdown (Informal)
[From Prejudice to Parity: A New Approach to Debiasing Large Language Model Word Embeddings](https://aclanthology.org/2025.coling-main.450/) (Rakshit et al., COLING 2025)
ACL