@inproceedings{tal-etal-2022-fewer,
title = "Fewer Errors, but More Stereotypes? The Effect of Model Size on Gender Bias",
author = "Tal, Yarden and
Magar, Inbal and
Schwartz, Roy",
editor = "Hardmeier, Christian and
Basta, Christine and
Costa-juss{\`a}, Marta R. and
Stanovsky, Gabriel and
Gonen, Hila",
booktitle = "Proceedings of the 4th Workshop on Gender Bias in Natural Language Processing (GeBNLP)",
month = jul,
year = "2022",
address = "Seattle, Washington",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.gebnlp-1.13",
doi = "10.18653/v1/2022.gebnlp-1.13",
pages = "112--120",
abstract = "The size of pretrained models is increasing, and so is their performance on a variety of NLP tasks. However, as their memorization capacity grows, they might pick up more social biases. In this work, we examine the connection between model size and its gender bias (specifically, occupational gender bias). We measure bias in three masked language model families (RoBERTa, DeBERTa, and T5) in two setups: directly using prompt based method, and using a downstream task (Winogender). We find on the one hand that larger models receive higher bias scores on the former task, but when evaluated on the latter, they make fewer gender errors. To examine these potentially conflicting results, we carefully investigate the behavior of the different models on Winogender. We find that while larger models outperform smaller ones, the probability that their mistakes are caused by gender bias is higher. Moreover, we find that the proportion of stereotypical errors compared to anti-stereotypical ones grows with the model size. Our findings highlight the potential risks that can arise from increasing model size.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tal-etal-2022-fewer">
<titleInfo>
<title>Fewer Errors, but More Stereotypes? The Effect of Model Size on Gender Bias</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yarden</namePart>
<namePart type="family">Tal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Inbal</namePart>
<namePart type="family">Magar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roy</namePart>
<namePart type="family">Schwartz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Workshop on Gender Bias in Natural Language Processing (GeBNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Hardmeier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christine</namePart>
<namePart type="family">Basta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marta</namePart>
<namePart type="given">R</namePart>
<namePart type="family">Costa-jussà</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gabriel</namePart>
<namePart type="family">Stanovsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hila</namePart>
<namePart type="family">Gonen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, Washington</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The size of pretrained models is increasing, and so is their performance on a variety of NLP tasks. However, as their memorization capacity grows, they might pick up more social biases. In this work, we examine the connection between model size and its gender bias (specifically, occupational gender bias). We measure bias in three masked language model families (RoBERTa, DeBERTa, and T5) in two setups: directly using prompt based method, and using a downstream task (Winogender). We find on the one hand that larger models receive higher bias scores on the former task, but when evaluated on the latter, they make fewer gender errors. To examine these potentially conflicting results, we carefully investigate the behavior of the different models on Winogender. We find that while larger models outperform smaller ones, the probability that their mistakes are caused by gender bias is higher. Moreover, we find that the proportion of stereotypical errors compared to anti-stereotypical ones grows with the model size. Our findings highlight the potential risks that can arise from increasing model size.</abstract>
<identifier type="citekey">tal-etal-2022-fewer</identifier>
<identifier type="doi">10.18653/v1/2022.gebnlp-1.13</identifier>
<location>
<url>https://aclanthology.org/2022.gebnlp-1.13</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>112</start>
<end>120</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Fewer Errors, but More Stereotypes? The Effect of Model Size on Gender Bias
%A Tal, Yarden
%A Magar, Inbal
%A Schwartz, Roy
%Y Hardmeier, Christian
%Y Basta, Christine
%Y Costa-jussà, Marta R.
%Y Stanovsky, Gabriel
%Y Gonen, Hila
%S Proceedings of the 4th Workshop on Gender Bias in Natural Language Processing (GeBNLP)
%D 2022
%8 July
%I Association for Computational Linguistics
%C Seattle, Washington
%F tal-etal-2022-fewer
%X The size of pretrained models is increasing, and so is their performance on a variety of NLP tasks. However, as their memorization capacity grows, they might pick up more social biases. In this work, we examine the connection between model size and its gender bias (specifically, occupational gender bias). We measure bias in three masked language model families (RoBERTa, DeBERTa, and T5) in two setups: directly using prompt based method, and using a downstream task (Winogender). We find on the one hand that larger models receive higher bias scores on the former task, but when evaluated on the latter, they make fewer gender errors. To examine these potentially conflicting results, we carefully investigate the behavior of the different models on Winogender. We find that while larger models outperform smaller ones, the probability that their mistakes are caused by gender bias is higher. Moreover, we find that the proportion of stereotypical errors compared to anti-stereotypical ones grows with the model size. Our findings highlight the potential risks that can arise from increasing model size.
%R 10.18653/v1/2022.gebnlp-1.13
%U https://aclanthology.org/2022.gebnlp-1.13
%U https://doi.org/10.18653/v1/2022.gebnlp-1.13
%P 112-120
Markdown (Informal)
[Fewer Errors, but More Stereotypes? The Effect of Model Size on Gender Bias](https://aclanthology.org/2022.gebnlp-1.13) (Tal et al., GeBNLP 2022)
ACL