@article{kumar-etal-2020-nurse,
title = "Nurse is Closer to Woman than Surgeon? Mitigating Gender-Biased Proximities in Word Embeddings",
author = "Kumar, Vaibhav and
Bhotia, Tenzin Singhay and
Kumar, Vaibhav and
Chakraborty, Tanmoy",
editor = "Johnson, Mark and
Roark, Brian and
Nenkova, Ani",
journal = "Transactions of the Association for Computational Linguistics",
volume = "8",
year = "2020",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2020.tacl-1.32",
doi = "10.1162/tacl_a_00327",
pages = "486--503",
abstract = "Word embeddings are the standard model for semantic and syntactic representations of words. Unfortunately, these models have been shown to exhibit undesirable word associations resulting from gender, racial, and religious biases. Existing post-processing methods for debiasing word embeddings are unable to mitigate gender bias hidden in the spatial arrangement of word vectors. In this paper, we propose RAN-Debias, a novel gender debiasing methodology that not only eliminates the bias present in a word vector but also alters the spatial distribution of its neighboring vectors, achieving a bias-free setting while maintaining minimal semantic offset. We also propose a new bias evaluation metric, Gender-based Illicit Proximity Estimate (GIPE), which measures the extent of undue proximity in word vectors resulting from the presence of gender-based predilections. Experiments based on a suite of evaluation metrics show that RAN-Debias significantly outperforms the state-of-the-art in reducing proximity bias (GIPE) by at least 42.02{\%}. It also reduces direct bias, adding minimal semantic disturbance, and achieves the best performance in a downstream application task (coreference resolution).",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kumar-etal-2020-nurse">
<titleInfo>
<title>Nurse is Closer to Woman than Surgeon? Mitigating Gender-Biased Proximities in Word Embeddings</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vaibhav</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tenzin</namePart>
<namePart type="given">Singhay</namePart>
<namePart type="family">Bhotia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Word embeddings are the standard model for semantic and syntactic representations of words. Unfortunately, these models have been shown to exhibit undesirable word associations resulting from gender, racial, and religious biases. Existing post-processing methods for debiasing word embeddings are unable to mitigate gender bias hidden in the spatial arrangement of word vectors. In this paper, we propose RAN-Debias, a novel gender debiasing methodology that not only eliminates the bias present in a word vector but also alters the spatial distribution of its neighboring vectors, achieving a bias-free setting while maintaining minimal semantic offset. We also propose a new bias evaluation metric, Gender-based Illicit Proximity Estimate (GIPE), which measures the extent of undue proximity in word vectors resulting from the presence of gender-based predilections. Experiments based on a suite of evaluation metrics show that RAN-Debias significantly outperforms the state-of-the-art in reducing proximity bias (GIPE) by at least 42.02%. It also reduces direct bias, adding minimal semantic disturbance, and achieves the best performance in a downstream application task (coreference resolution).</abstract>
<identifier type="citekey">kumar-etal-2020-nurse</identifier>
<identifier type="doi">10.1162/tacl_a_00327</identifier>
<location>
<url>https://aclanthology.org/2020.tacl-1.32</url>
</location>
<part>
<date>2020</date>
<detail type="volume"><number>8</number></detail>
<extent unit="page">
<start>486</start>
<end>503</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Nurse is Closer to Woman than Surgeon? Mitigating Gender-Biased Proximities in Word Embeddings
%A Kumar, Vaibhav
%A Bhotia, Tenzin Singhay
%A Chakraborty, Tanmoy
%J Transactions of the Association for Computational Linguistics
%D 2020
%V 8
%I MIT Press
%C Cambridge, MA
%F kumar-etal-2020-nurse
%X Word embeddings are the standard model for semantic and syntactic representations of words. Unfortunately, these models have been shown to exhibit undesirable word associations resulting from gender, racial, and religious biases. Existing post-processing methods for debiasing word embeddings are unable to mitigate gender bias hidden in the spatial arrangement of word vectors. In this paper, we propose RAN-Debias, a novel gender debiasing methodology that not only eliminates the bias present in a word vector but also alters the spatial distribution of its neighboring vectors, achieving a bias-free setting while maintaining minimal semantic offset. We also propose a new bias evaluation metric, Gender-based Illicit Proximity Estimate (GIPE), which measures the extent of undue proximity in word vectors resulting from the presence of gender-based predilections. Experiments based on a suite of evaluation metrics show that RAN-Debias significantly outperforms the state-of-the-art in reducing proximity bias (GIPE) by at least 42.02%. It also reduces direct bias, adding minimal semantic disturbance, and achieves the best performance in a downstream application task (coreference resolution).
%R 10.1162/tacl_a_00327
%U https://aclanthology.org/2020.tacl-1.32
%U https://doi.org/10.1162/tacl_a_00327
%P 486-503
Markdown (Informal)
[Nurse is Closer to Woman than Surgeon? Mitigating Gender-Biased Proximities in Word Embeddings](https://aclanthology.org/2020.tacl-1.32) (Kumar et al., TACL 2020)
ACL