@inproceedings{chaloner-maldonado-2019-measuring,
title = "Measuring Gender Bias in Word Embeddings across Domains and Discovering New Gender Bias Word Categories",
author = "Chaloner, Kaytlin and
Maldonado, Alfredo",
editor = "Costa-juss{\`a}, Marta R. and
Hardmeier, Christian and
Radford, Will and
Webster, Kellie",
booktitle = "Proceedings of the First Workshop on Gender Bias in Natural Language Processing",
month = aug,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-3804",
doi = "10.18653/v1/W19-3804",
pages = "25--32",
abstract = "Prior work has shown that word embeddings capture human stereotypes, including gender bias. However, there is a lack of studies testing the presence of specific gender bias categories in word embeddings across diverse domains. This paper aims to fill this gap by applying the WEAT bias detection method to four sets of word embeddings trained on corpora from four different domains: news, social networking, biomedical and a gender-balanced corpus extracted from Wikipedia (GAP). We find that some domains are definitely more prone to gender bias than others, and that the categories of gender bias present also vary for each set of word embeddings. We detect some gender bias in GAP. We also propose a simple but novel method for discovering new bias categories by clustering word embeddings. We validate this method through WEAT{'}s hypothesis testing mechanism and find it useful for expanding the relatively small set of well-known gender bias word categories commonly used in the literature.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chaloner-maldonado-2019-measuring">
<titleInfo>
<title>Measuring Gender Bias in Word Embeddings across Domains and Discovering New Gender Bias Word Categories</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kaytlin</namePart>
<namePart type="family">Chaloner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alfredo</namePart>
<namePart type="family">Maldonado</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Gender Bias in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marta</namePart>
<namePart type="given">R</namePart>
<namePart type="family">Costa-jussà</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Hardmeier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Will</namePart>
<namePart type="family">Radford</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kellie</namePart>
<namePart type="family">Webster</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Prior work has shown that word embeddings capture human stereotypes, including gender bias. However, there is a lack of studies testing the presence of specific gender bias categories in word embeddings across diverse domains. This paper aims to fill this gap by applying the WEAT bias detection method to four sets of word embeddings trained on corpora from four different domains: news, social networking, biomedical and a gender-balanced corpus extracted from Wikipedia (GAP). We find that some domains are definitely more prone to gender bias than others, and that the categories of gender bias present also vary for each set of word embeddings. We detect some gender bias in GAP. We also propose a simple but novel method for discovering new bias categories by clustering word embeddings. We validate this method through WEAT’s hypothesis testing mechanism and find it useful for expanding the relatively small set of well-known gender bias word categories commonly used in the literature.</abstract>
<identifier type="citekey">chaloner-maldonado-2019-measuring</identifier>
<identifier type="doi">10.18653/v1/W19-3804</identifier>
<location>
<url>https://aclanthology.org/W19-3804</url>
</location>
<part>
<date>2019-08</date>
<extent unit="page">
<start>25</start>
<end>32</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Measuring Gender Bias in Word Embeddings across Domains and Discovering New Gender Bias Word Categories
%A Chaloner, Kaytlin
%A Maldonado, Alfredo
%Y Costa-jussà, Marta R.
%Y Hardmeier, Christian
%Y Radford, Will
%Y Webster, Kellie
%S Proceedings of the First Workshop on Gender Bias in Natural Language Processing
%D 2019
%8 August
%I Association for Computational Linguistics
%C Florence, Italy
%F chaloner-maldonado-2019-measuring
%X Prior work has shown that word embeddings capture human stereotypes, including gender bias. However, there is a lack of studies testing the presence of specific gender bias categories in word embeddings across diverse domains. This paper aims to fill this gap by applying the WEAT bias detection method to four sets of word embeddings trained on corpora from four different domains: news, social networking, biomedical and a gender-balanced corpus extracted from Wikipedia (GAP). We find that some domains are definitely more prone to gender bias than others, and that the categories of gender bias present also vary for each set of word embeddings. We detect some gender bias in GAP. We also propose a simple but novel method for discovering new bias categories by clustering word embeddings. We validate this method through WEAT’s hypothesis testing mechanism and find it useful for expanding the relatively small set of well-known gender bias word categories commonly used in the literature.
%R 10.18653/v1/W19-3804
%U https://aclanthology.org/W19-3804
%U https://doi.org/10.18653/v1/W19-3804
%P 25-32
Markdown (Informal)
[Measuring Gender Bias in Word Embeddings across Domains and Discovering New Gender Bias Word Categories](https://aclanthology.org/W19-3804) (Chaloner & Maldonado, GeBNLP 2019)
ACL