@inproceedings{schmahl-etal-2020-wikipedia,
title = "Is {W}ikipedia succeeding in reducing gender bias? Assessing changes in gender bias in {W}ikipedia using word embeddings",
author = "Schmahl, Katja Geertruida and
Viering, Tom Julian and
Makrodimitris, Stavros and
Naseri Jahfari, Arman and
Tax, David and
Loog, Marco",
editor = "Bamman, David and
Hovy, Dirk and
Jurgens, David and
O'Connor, Brendan and
Volkova, Svitlana",
booktitle = "Proceedings of the Fourth Workshop on Natural Language Processing and Computational Social Science",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.nlpcss-1.11/",
doi = "10.18653/v1/2020.nlpcss-1.11",
pages = "94--103",
abstract = "Large text corpora used for creating word embeddings (vectors which represent word meanings) often contain stereotypical gender biases. As a result, such unwanted biases will typically also be present in word embeddings derived from such corpora and downstream applications in the field of natural language processing (NLP). To minimize the effect of gender bias in these settings, more insight is needed when it comes to where and how biases manifest themselves in the text corpora employed. This paper contributes by showing how gender bias in word embeddings from Wikipedia has developed over time. Quantifying the gender bias over time shows that art related words have become more female biased. Family and science words have stereotypical biases towards respectively female and male words. These biases seem to have decreased since 2006, but these changes are not more extreme than those seen in random sets of words. Career related words are more strongly associated with male than with female, this difference has only become smaller in recently written articles. These developments provide additional understanding of what can be done to make Wikipedia more gender neutral and how important time of writing can be when considering biases in word embeddings trained from Wikipedia or from other text corpora."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="schmahl-etal-2020-wikipedia">
<titleInfo>
<title>Is Wikipedia succeeding in reducing gender bias? Assessing changes in gender bias in Wikipedia using word embeddings</title>
</titleInfo>
<name type="personal">
<namePart type="given">Katja</namePart>
<namePart type="given">Geertruida</namePart>
<namePart type="family">Schmahl</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="given">Julian</namePart>
<namePart type="family">Viering</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stavros</namePart>
<namePart type="family">Makrodimitris</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arman</namePart>
<namePart type="family">Naseri Jahfari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Tax</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Loog</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on Natural Language Processing and Computational Social Science</title>
</titleInfo>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Bamman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dirk</namePart>
<namePart type="family">Hovy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brendan</namePart>
<namePart type="family">O’Connor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Svitlana</namePart>
<namePart type="family">Volkova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large text corpora used for creating word embeddings (vectors which represent word meanings) often contain stereotypical gender biases. As a result, such unwanted biases will typically also be present in word embeddings derived from such corpora and downstream applications in the field of natural language processing (NLP). To minimize the effect of gender bias in these settings, more insight is needed when it comes to where and how biases manifest themselves in the text corpora employed. This paper contributes by showing how gender bias in word embeddings from Wikipedia has developed over time. Quantifying the gender bias over time shows that art related words have become more female biased. Family and science words have stereotypical biases towards respectively female and male words. These biases seem to have decreased since 2006, but these changes are not more extreme than those seen in random sets of words. Career related words are more strongly associated with male than with female, this difference has only become smaller in recently written articles. These developments provide additional understanding of what can be done to make Wikipedia more gender neutral and how important time of writing can be when considering biases in word embeddings trained from Wikipedia or from other text corpora.</abstract>
<identifier type="citekey">schmahl-etal-2020-wikipedia</identifier>
<identifier type="doi">10.18653/v1/2020.nlpcss-1.11</identifier>
<location>
<url>https://aclanthology.org/2020.nlpcss-1.11/</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>94</start>
<end>103</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Is Wikipedia succeeding in reducing gender bias? Assessing changes in gender bias in Wikipedia using word embeddings
%A Schmahl, Katja Geertruida
%A Viering, Tom Julian
%A Makrodimitris, Stavros
%A Naseri Jahfari, Arman
%A Tax, David
%A Loog, Marco
%Y Bamman, David
%Y Hovy, Dirk
%Y Jurgens, David
%Y O’Connor, Brendan
%Y Volkova, Svitlana
%S Proceedings of the Fourth Workshop on Natural Language Processing and Computational Social Science
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F schmahl-etal-2020-wikipedia
%X Large text corpora used for creating word embeddings (vectors which represent word meanings) often contain stereotypical gender biases. As a result, such unwanted biases will typically also be present in word embeddings derived from such corpora and downstream applications in the field of natural language processing (NLP). To minimize the effect of gender bias in these settings, more insight is needed when it comes to where and how biases manifest themselves in the text corpora employed. This paper contributes by showing how gender bias in word embeddings from Wikipedia has developed over time. Quantifying the gender bias over time shows that art related words have become more female biased. Family and science words have stereotypical biases towards respectively female and male words. These biases seem to have decreased since 2006, but these changes are not more extreme than those seen in random sets of words. Career related words are more strongly associated with male than with female, this difference has only become smaller in recently written articles. These developments provide additional understanding of what can be done to make Wikipedia more gender neutral and how important time of writing can be when considering biases in word embeddings trained from Wikipedia or from other text corpora.
%R 10.18653/v1/2020.nlpcss-1.11
%U https://aclanthology.org/2020.nlpcss-1.11/
%U https://doi.org/10.18653/v1/2020.nlpcss-1.11
%P 94-103
Markdown (Informal)
[Is Wikipedia succeeding in reducing gender bias? Assessing changes in gender bias in Wikipedia using word embeddings](https://aclanthology.org/2020.nlpcss-1.11/) (Schmahl et al., NLP+CSS 2020)
ACL