@inproceedings{husse-spitz-2022-mind,
title = "Mind Your Bias: A Critical Review of Bias Detection Methods for Contextual Language Models",
author = "Husse, Silke and
Spitz, Andreas",
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2022",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.findings-emnlp.311",
doi = "10.18653/v1/2022.findings-emnlp.311",
pages = "4212--4234",
abstract = "The awareness and mitigation of biases are of fundamental importance for the fair and transparent use of contextual language models, yet they crucially depend on the accurate detection of biases as a precursor. Consequently, numerous bias detection methods have been proposed, which vary in their approach, the considered type of bias, and the data used for evaluation. However, while most detection methods are derived from the word embedding association test for static word embeddings, the reported results are heterogeneous, inconsistent, and ultimately inconclusive. To address this issue, we conduct a rigorous analysis and comparison of bias detection methods for contextual language models. Our results show that minor design and implementation decisions (or errors) have a substantial and often significant impact on the derived bias scores. Overall, we find the state of the field to be both worse than previously acknowledged due to systematic and propagated errors in implementations, yet better than anticipated since divergent results in the literature homogenize after accounting for implementation errors. Based on our findings, we conclude with a discussion of paths towards more robust and consistent bias detection methods.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="husse-spitz-2022-mind">
<titleInfo>
<title>Mind Your Bias: A Critical Review of Bias Detection Methods for Contextual Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Silke</namePart>
<namePart type="family">Husse</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Spitz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Goldberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The awareness and mitigation of biases are of fundamental importance for the fair and transparent use of contextual language models, yet they crucially depend on the accurate detection of biases as a precursor. Consequently, numerous bias detection methods have been proposed, which vary in their approach, the considered type of bias, and the data used for evaluation. However, while most detection methods are derived from the word embedding association test for static word embeddings, the reported results are heterogeneous, inconsistent, and ultimately inconclusive. To address this issue, we conduct a rigorous analysis and comparison of bias detection methods for contextual language models. Our results show that minor design and implementation decisions (or errors) have a substantial and often significant impact on the derived bias scores. Overall, we find the state of the field to be both worse than previously acknowledged due to systematic and propagated errors in implementations, yet better than anticipated since divergent results in the literature homogenize after accounting for implementation errors. Based on our findings, we conclude with a discussion of paths towards more robust and consistent bias detection methods.</abstract>
<identifier type="citekey">husse-spitz-2022-mind</identifier>
<identifier type="doi">10.18653/v1/2022.findings-emnlp.311</identifier>
<location>
<url>https://aclanthology.org/2022.findings-emnlp.311</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>4212</start>
<end>4234</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Mind Your Bias: A Critical Review of Bias Detection Methods for Contextual Language Models
%A Husse, Silke
%A Spitz, Andreas
%Y Goldberg, Yoav
%Y Kozareva, Zornitsa
%Y Zhang, Yue
%S Findings of the Association for Computational Linguistics: EMNLP 2022
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates
%F husse-spitz-2022-mind
%X The awareness and mitigation of biases are of fundamental importance for the fair and transparent use of contextual language models, yet they crucially depend on the accurate detection of biases as a precursor. Consequently, numerous bias detection methods have been proposed, which vary in their approach, the considered type of bias, and the data used for evaluation. However, while most detection methods are derived from the word embedding association test for static word embeddings, the reported results are heterogeneous, inconsistent, and ultimately inconclusive. To address this issue, we conduct a rigorous analysis and comparison of bias detection methods for contextual language models. Our results show that minor design and implementation decisions (or errors) have a substantial and often significant impact on the derived bias scores. Overall, we find the state of the field to be both worse than previously acknowledged due to systematic and propagated errors in implementations, yet better than anticipated since divergent results in the literature homogenize after accounting for implementation errors. Based on our findings, we conclude with a discussion of paths towards more robust and consistent bias detection methods.
%R 10.18653/v1/2022.findings-emnlp.311
%U https://aclanthology.org/2022.findings-emnlp.311
%U https://doi.org/10.18653/v1/2022.findings-emnlp.311
%P 4212-4234
Markdown (Informal)
[Mind Your Bias: A Critical Review of Bias Detection Methods for Contextual Language Models](https://aclanthology.org/2022.findings-emnlp.311) (Husse & Spitz, Findings 2022)
ACL