@inproceedings{jeoung-diesner-2022-changed,
title = "What changed? Investigating Debiasing Methods using Causal Mediation Analysis",
author = "Jeoung, Sullam and
Diesner, Jana",
editor = "Hardmeier, Christian and
Basta, Christine and
Costa-juss{\`a}, Marta R. and
Stanovsky, Gabriel and
Gonen, Hila",
booktitle = "Proceedings of the 4th Workshop on Gender Bias in Natural Language Processing (GeBNLP)",
month = jul,
year = "2022",
address = "Seattle, Washington",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.gebnlp-1.26",
doi = "10.18653/v1/2022.gebnlp-1.26",
pages = "255--265",
abstract = "Previous work has examined how debiasing language models affect downstream tasks, specifically, how debiasing techniques influence task performance and whether debiased models also make impartial predictions in downstream tasks or not. However, what we don{'}t understand well yet is why debiasing methods have varying impacts on downstream tasks and how debiasing techniques affect internal components of language models, i.e., neurons, layers, and attentions. In this paper, we decompose the internal mechanisms of debiasing language models with respect to gender by applying causal mediation analysis to understand the influence of debiasing methods on toxicity detection as a downstream task. Our findings suggest a need to test the effectiveness of debiasing methods with different bias metrics, and to focus on changes in the behavior of certain components of the models, e.g.,first two layers of language models, and attention heads.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jeoung-diesner-2022-changed">
<titleInfo>
<title>What changed? Investigating Debiasing Methods using Causal Mediation Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sullam</namePart>
<namePart type="family">Jeoung</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jana</namePart>
<namePart type="family">Diesner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Workshop on Gender Bias in Natural Language Processing (GeBNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Hardmeier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christine</namePart>
<namePart type="family">Basta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marta</namePart>
<namePart type="given">R</namePart>
<namePart type="family">Costa-jussà</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gabriel</namePart>
<namePart type="family">Stanovsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hila</namePart>
<namePart type="family">Gonen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, Washington</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Previous work has examined how debiasing language models affect downstream tasks, specifically, how debiasing techniques influence task performance and whether debiased models also make impartial predictions in downstream tasks or not. However, what we don’t understand well yet is why debiasing methods have varying impacts on downstream tasks and how debiasing techniques affect internal components of language models, i.e., neurons, layers, and attentions. In this paper, we decompose the internal mechanisms of debiasing language models with respect to gender by applying causal mediation analysis to understand the influence of debiasing methods on toxicity detection as a downstream task. Our findings suggest a need to test the effectiveness of debiasing methods with different bias metrics, and to focus on changes in the behavior of certain components of the models, e.g.,first two layers of language models, and attention heads.</abstract>
<identifier type="citekey">jeoung-diesner-2022-changed</identifier>
<identifier type="doi">10.18653/v1/2022.gebnlp-1.26</identifier>
<location>
<url>https://aclanthology.org/2022.gebnlp-1.26</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>255</start>
<end>265</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T What changed? Investigating Debiasing Methods using Causal Mediation Analysis
%A Jeoung, Sullam
%A Diesner, Jana
%Y Hardmeier, Christian
%Y Basta, Christine
%Y Costa-jussà, Marta R.
%Y Stanovsky, Gabriel
%Y Gonen, Hila
%S Proceedings of the 4th Workshop on Gender Bias in Natural Language Processing (GeBNLP)
%D 2022
%8 July
%I Association for Computational Linguistics
%C Seattle, Washington
%F jeoung-diesner-2022-changed
%X Previous work has examined how debiasing language models affect downstream tasks, specifically, how debiasing techniques influence task performance and whether debiased models also make impartial predictions in downstream tasks or not. However, what we don’t understand well yet is why debiasing methods have varying impacts on downstream tasks and how debiasing techniques affect internal components of language models, i.e., neurons, layers, and attentions. In this paper, we decompose the internal mechanisms of debiasing language models with respect to gender by applying causal mediation analysis to understand the influence of debiasing methods on toxicity detection as a downstream task. Our findings suggest a need to test the effectiveness of debiasing methods with different bias metrics, and to focus on changes in the behavior of certain components of the models, e.g.,first two layers of language models, and attention heads.
%R 10.18653/v1/2022.gebnlp-1.26
%U https://aclanthology.org/2022.gebnlp-1.26
%U https://doi.org/10.18653/v1/2022.gebnlp-1.26
%P 255-265
Markdown (Informal)
[What changed? Investigating Debiasing Methods using Causal Mediation Analysis](https://aclanthology.org/2022.gebnlp-1.26) (Jeoung & Diesner, GeBNLP 2022)
ACL