@inproceedings{hirota-etal-2024-resampled,
title = "Resampled Datasets Are Not Enough: Mitigating Societal Bias Beyond Single Attributes",
author = "Hirota, Yusuke and
Andrews, Jerone and
Zhao, Dora and
Papakyriakopoulos, Orestis and
Modas, Apostolos and
Nakashima, Yuta and
Xiang, Alice",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-main.471",
pages = "8249--8267",
abstract = "We tackle societal bias in image-text datasets by removing spurious correlations between protected groups and image attributes. Traditional methods only target labeled attributes, ignoring biases from unlabeled ones. Using text-guided inpainting models, our approach ensures protected group independence from all attributes and mitigates inpainting biases through data filtering. Evaluations on multi-label image classification and image captioning tasks show our method effectively reduces bias without compromising performance across various models. Specifically, we achieve an average societal bias reduction of 46.1{\%} in leakage-based bias metrics for multi-label classification and 74.8{\%} for image captioning.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hirota-etal-2024-resampled">
<titleInfo>
<title>Resampled Datasets Are Not Enough: Mitigating Societal Bias Beyond Single Attributes</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yusuke</namePart>
<namePart type="family">Hirota</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jerone</namePart>
<namePart type="family">Andrews</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dora</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Orestis</namePart>
<namePart type="family">Papakyriakopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Apostolos</namePart>
<namePart type="family">Modas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuta</namePart>
<namePart type="family">Nakashima</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alice</namePart>
<namePart type="family">Xiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We tackle societal bias in image-text datasets by removing spurious correlations between protected groups and image attributes. Traditional methods only target labeled attributes, ignoring biases from unlabeled ones. Using text-guided inpainting models, our approach ensures protected group independence from all attributes and mitigates inpainting biases through data filtering. Evaluations on multi-label image classification and image captioning tasks show our method effectively reduces bias without compromising performance across various models. Specifically, we achieve an average societal bias reduction of 46.1% in leakage-based bias metrics for multi-label classification and 74.8% for image captioning.</abstract>
<identifier type="citekey">hirota-etal-2024-resampled</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-main.471</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>8249</start>
<end>8267</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Resampled Datasets Are Not Enough: Mitigating Societal Bias Beyond Single Attributes
%A Hirota, Yusuke
%A Andrews, Jerone
%A Zhao, Dora
%A Papakyriakopoulos, Orestis
%A Modas, Apostolos
%A Nakashima, Yuta
%A Xiang, Alice
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F hirota-etal-2024-resampled
%X We tackle societal bias in image-text datasets by removing spurious correlations between protected groups and image attributes. Traditional methods only target labeled attributes, ignoring biases from unlabeled ones. Using text-guided inpainting models, our approach ensures protected group independence from all attributes and mitigates inpainting biases through data filtering. Evaluations on multi-label image classification and image captioning tasks show our method effectively reduces bias without compromising performance across various models. Specifically, we achieve an average societal bias reduction of 46.1% in leakage-based bias metrics for multi-label classification and 74.8% for image captioning.
%U https://aclanthology.org/2024.emnlp-main.471
%P 8249-8267
Markdown (Informal)
[Resampled Datasets Are Not Enough: Mitigating Societal Bias Beyond Single Attributes](https://aclanthology.org/2024.emnlp-main.471) (Hirota et al., EMNLP 2024)
ACL
- Yusuke Hirota, Jerone Andrews, Dora Zhao, Orestis Papakyriakopoulos, Apostolos Modas, Yuta Nakashima, and Alice Xiang. 2024. Resampled Datasets Are Not Enough: Mitigating Societal Bias Beyond Single Attributes. In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, pages 8249–8267, Miami, Florida, USA. Association for Computational Linguistics.