@inproceedings{zhou-etal-2023-causal,
title = "Causal-Debias: Unifying Debiasing in Pretrained Language Models and Fine-tuning via Causal Invariant Learning",
author = "Zhou, Fan and
Mao, Yuzhou and
Yu, Liu and
Yang, Yi and
Zhong, Ting",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.acl-long.232",
doi = "10.18653/v1/2023.acl-long.232",
pages = "4227--4241",
abstract = "Demographic biases and social stereotypes are common in pretrained language models (PLMs), and a burgeoning body of literature focuses on removing the unwanted stereotypical associations from PLMs. However, when fine-tuning these bias-mitigated PLMs in downstream natural language processing (NLP) applications, such as sentiment classification, the unwanted stereotypical associations resurface or even get amplified. Since pretrain{\&}fine-tune is a major paradigm in NLP applications, separating the debiasing procedure of PLMs from fine-tuning would eventually harm the actual downstream utility. In this paper, we propose a unified debiasing framework Causal-Debias to remove unwanted stereotypical associations in PLMs during fine-tuning. Specifically, CausalDebias mitigates bias from a causal invariant perspective by leveraging the specific downstream task to identify bias-relevant and labelrelevant factors. We propose that bias-relevant factors are non-causal as they should have little impact on downstream tasks, while labelrelevant factors are causal. We perform interventions on non-causal factors in different demographic groups and design an invariant risk minimization loss to mitigate bias while maintaining task performance. Experimental results on three downstream tasks show that our proposed method can remarkably reduce unwanted stereotypical associations after PLMs are finetuned, while simultaneously minimizing the impact on PLMs and downstream applications.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhou-etal-2023-causal">
<titleInfo>
<title>Causal-Debias: Unifying Debiasing in Pretrained Language Models and Fine-tuning via Causal Invariant Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Fan</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuzhou</namePart>
<namePart type="family">Mao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liu</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yi</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ting</namePart>
<namePart type="family">Zhong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Boyd-Graber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naoaki</namePart>
<namePart type="family">Okazaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Demographic biases and social stereotypes are common in pretrained language models (PLMs), and a burgeoning body of literature focuses on removing the unwanted stereotypical associations from PLMs. However, when fine-tuning these bias-mitigated PLMs in downstream natural language processing (NLP) applications, such as sentiment classification, the unwanted stereotypical associations resurface or even get amplified. Since pretrain&fine-tune is a major paradigm in NLP applications, separating the debiasing procedure of PLMs from fine-tuning would eventually harm the actual downstream utility. In this paper, we propose a unified debiasing framework Causal-Debias to remove unwanted stereotypical associations in PLMs during fine-tuning. Specifically, CausalDebias mitigates bias from a causal invariant perspective by leveraging the specific downstream task to identify bias-relevant and labelrelevant factors. We propose that bias-relevant factors are non-causal as they should have little impact on downstream tasks, while labelrelevant factors are causal. We perform interventions on non-causal factors in different demographic groups and design an invariant risk minimization loss to mitigate bias while maintaining task performance. Experimental results on three downstream tasks show that our proposed method can remarkably reduce unwanted stereotypical associations after PLMs are finetuned, while simultaneously minimizing the impact on PLMs and downstream applications.</abstract>
<identifier type="citekey">zhou-etal-2023-causal</identifier>
<identifier type="doi">10.18653/v1/2023.acl-long.232</identifier>
<location>
<url>https://aclanthology.org/2023.acl-long.232</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>4227</start>
<end>4241</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Causal-Debias: Unifying Debiasing in Pretrained Language Models and Fine-tuning via Causal Invariant Learning
%A Zhou, Fan
%A Mao, Yuzhou
%A Yu, Liu
%A Yang, Yi
%A Zhong, Ting
%Y Rogers, Anna
%Y Boyd-Graber, Jordan
%Y Okazaki, Naoaki
%S Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F zhou-etal-2023-causal
%X Demographic biases and social stereotypes are common in pretrained language models (PLMs), and a burgeoning body of literature focuses on removing the unwanted stereotypical associations from PLMs. However, when fine-tuning these bias-mitigated PLMs in downstream natural language processing (NLP) applications, such as sentiment classification, the unwanted stereotypical associations resurface or even get amplified. Since pretrain&fine-tune is a major paradigm in NLP applications, separating the debiasing procedure of PLMs from fine-tuning would eventually harm the actual downstream utility. In this paper, we propose a unified debiasing framework Causal-Debias to remove unwanted stereotypical associations in PLMs during fine-tuning. Specifically, CausalDebias mitigates bias from a causal invariant perspective by leveraging the specific downstream task to identify bias-relevant and labelrelevant factors. We propose that bias-relevant factors are non-causal as they should have little impact on downstream tasks, while labelrelevant factors are causal. We perform interventions on non-causal factors in different demographic groups and design an invariant risk minimization loss to mitigate bias while maintaining task performance. Experimental results on three downstream tasks show that our proposed method can remarkably reduce unwanted stereotypical associations after PLMs are finetuned, while simultaneously minimizing the impact on PLMs and downstream applications.
%R 10.18653/v1/2023.acl-long.232
%U https://aclanthology.org/2023.acl-long.232
%U https://doi.org/10.18653/v1/2023.acl-long.232
%P 4227-4241
Markdown (Informal)
[Causal-Debias: Unifying Debiasing in Pretrained Language Models and Fine-tuning via Causal Invariant Learning](https://aclanthology.org/2023.acl-long.232) (Zhou et al., ACL 2023)
ACL