@inproceedings{sae-lim-etal-2024-identifying,
title = "Identifying and Mitigating Annotation Bias in Natural Language Understanding using Causal Mediation Analysis",
author = "Sae Lim, Sitiporn and
Udomcharoenchaikit, Can and
Limkonchotiwat, Peerat and
Chuangsuwanich, Ekapol and
Nutanong, Sarana",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2024",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-acl.686/",
doi = "10.18653/v1/2024.findings-acl.686",
pages = "11548--11563",
abstract = "NLU models have achieved promising results on standard benchmarks. Despite state-of-the-art accuracy, analysis reveals that many models make predictions using annotation bias rather than the properties we intend the model to learn. Consequently, these models perform poorly on out-of-distribution datasets. Recent advances in bias mitigation show that annotation bias can be alleviated through fine-tuning debiasing objectives. In this paper, we apply causal mediation analysis to gauge how much each model component mediates annotation biases. Using the knowledge from the causal analysis, we improve the model`s robustness against annotation bias through two bias mitigation methods: causal-grounded masking and gradient unlearning. Causal analysis reveals that biases concentrated in specific components, even after employing other training-time debiasing techniques. Manipulating these components by masking out neurons' activations or updating specific weight blocks both demonstrably improve robustness against annotation artifacts."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sae-lim-etal-2024-identifying">
<titleInfo>
<title>Identifying and Mitigating Annotation Bias in Natural Language Understanding using Causal Mediation Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sitiporn</namePart>
<namePart type="family">Sae Lim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Can</namePart>
<namePart type="family">Udomcharoenchaikit</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peerat</namePart>
<namePart type="family">Limkonchotiwat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekapol</namePart>
<namePart type="family">Chuangsuwanich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sarana</namePart>
<namePart type="family">Nutanong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Srikumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>NLU models have achieved promising results on standard benchmarks. Despite state-of-the-art accuracy, analysis reveals that many models make predictions using annotation bias rather than the properties we intend the model to learn. Consequently, these models perform poorly on out-of-distribution datasets. Recent advances in bias mitigation show that annotation bias can be alleviated through fine-tuning debiasing objectives. In this paper, we apply causal mediation analysis to gauge how much each model component mediates annotation biases. Using the knowledge from the causal analysis, we improve the model‘s robustness against annotation bias through two bias mitigation methods: causal-grounded masking and gradient unlearning. Causal analysis reveals that biases concentrated in specific components, even after employing other training-time debiasing techniques. Manipulating these components by masking out neurons’ activations or updating specific weight blocks both demonstrably improve robustness against annotation artifacts.</abstract>
<identifier type="citekey">sae-lim-etal-2024-identifying</identifier>
<identifier type="doi">10.18653/v1/2024.findings-acl.686</identifier>
<location>
<url>https://aclanthology.org/2024.findings-acl.686/</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>11548</start>
<end>11563</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Identifying and Mitigating Annotation Bias in Natural Language Understanding using Causal Mediation Analysis
%A Sae Lim, Sitiporn
%A Udomcharoenchaikit, Can
%A Limkonchotiwat, Peerat
%A Chuangsuwanich, Ekapol
%A Nutanong, Sarana
%Y Ku, Lun-Wei
%Y Martins, Andre
%Y Srikumar, Vivek
%S Findings of the Association for Computational Linguistics: ACL 2024
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F sae-lim-etal-2024-identifying
%X NLU models have achieved promising results on standard benchmarks. Despite state-of-the-art accuracy, analysis reveals that many models make predictions using annotation bias rather than the properties we intend the model to learn. Consequently, these models perform poorly on out-of-distribution datasets. Recent advances in bias mitigation show that annotation bias can be alleviated through fine-tuning debiasing objectives. In this paper, we apply causal mediation analysis to gauge how much each model component mediates annotation biases. Using the knowledge from the causal analysis, we improve the model‘s robustness against annotation bias through two bias mitigation methods: causal-grounded masking and gradient unlearning. Causal analysis reveals that biases concentrated in specific components, even after employing other training-time debiasing techniques. Manipulating these components by masking out neurons’ activations or updating specific weight blocks both demonstrably improve robustness against annotation artifacts.
%R 10.18653/v1/2024.findings-acl.686
%U https://aclanthology.org/2024.findings-acl.686/
%U https://doi.org/10.18653/v1/2024.findings-acl.686
%P 11548-11563
Markdown (Informal)
[Identifying and Mitigating Annotation Bias in Natural Language Understanding using Causal Mediation Analysis](https://aclanthology.org/2024.findings-acl.686/) (Sae Lim et al., Findings 2024)
ACL