@inproceedings{ivetta-etal-2025-insights,
title = "Insights from a Disaggregated Analysis of Kinds of Biases in a Multicultural Dataset",
author = "Ivetta, Guido and
Maina, Hern{\'a}n and
Benotti, Luciana",
editor = "Zhang, Chen and
Allaway, Emily and
Shen, Hua and
Miculicich, Lesly and
Li, Yinqiao and
M'hamdi, Meryem and
Limkonchotiwat, Peerat and
Bai, Richard He and
T.y.s.s., Santosh and
Han, Sophia Simeng and
Thapa, Surendrabikram and
Rim, Wiem Ben",
booktitle = "Proceedings of the 9th Widening NLP Workshop",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.winlp-main.20/",
pages = "116--122",
ISBN = "979-8-89176-351-7",
abstract = "Warning: This paper contains explicit statements of offensive stereotypes which may be upsetting.Stereotypes vary across cultural contexts, making it essential to understand how language models encode social biases. MultiLingualCrowsPairs is a dataset of culturally adapted stereotypical and anti-stereotypical sentence pairs across nine languages. While prior work has primarily reported average fairness metrics on masked language models, this paper analyzes social biases in generative models by disaggregating results across specific bias types.We find that although most languages show an overall preference for stereotypical sentences, this masks substantial variation across different types of bias, such as gender, religion, and socioeconomic status. Our findings underscore that relying solely on aggregated metrics can obscure important patterns, and that fine-grained, bias-specific analysis is critical for meaningful fairness evaluation."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ivetta-etal-2025-insights">
<titleInfo>
<title>Insights from a Disaggregated Analysis of Kinds of Biases in a Multicultural Dataset</title>
</titleInfo>
<name type="personal">
<namePart type="given">Guido</namePart>
<namePart type="family">Ivetta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hernán</namePart>
<namePart type="family">Maina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luciana</namePart>
<namePart type="family">Benotti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 9th Widening NLP Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chen</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emily</namePart>
<namePart type="family">Allaway</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hua</namePart>
<namePart type="family">Shen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lesly</namePart>
<namePart type="family">Miculicich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yinqiao</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Meryem</namePart>
<namePart type="family">M’hamdi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peerat</namePart>
<namePart type="family">Limkonchotiwat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="given">He</namePart>
<namePart type="family">Bai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Santosh</namePart>
<namePart type="family">T.y.s.s.</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="given">Simeng</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Surendrabikram</namePart>
<namePart type="family">Thapa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wiem</namePart>
<namePart type="given">Ben</namePart>
<namePart type="family">Rim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-351-7</identifier>
</relatedItem>
<abstract>Warning: This paper contains explicit statements of offensive stereotypes which may be upsetting.Stereotypes vary across cultural contexts, making it essential to understand how language models encode social biases. MultiLingualCrowsPairs is a dataset of culturally adapted stereotypical and anti-stereotypical sentence pairs across nine languages. While prior work has primarily reported average fairness metrics on masked language models, this paper analyzes social biases in generative models by disaggregating results across specific bias types.We find that although most languages show an overall preference for stereotypical sentences, this masks substantial variation across different types of bias, such as gender, religion, and socioeconomic status. Our findings underscore that relying solely on aggregated metrics can obscure important patterns, and that fine-grained, bias-specific analysis is critical for meaningful fairness evaluation.</abstract>
<identifier type="citekey">ivetta-etal-2025-insights</identifier>
<location>
<url>https://aclanthology.org/2025.winlp-main.20/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>116</start>
<end>122</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Insights from a Disaggregated Analysis of Kinds of Biases in a Multicultural Dataset
%A Ivetta, Guido
%A Maina, Hernán
%A Benotti, Luciana
%Y Zhang, Chen
%Y Allaway, Emily
%Y Shen, Hua
%Y Miculicich, Lesly
%Y Li, Yinqiao
%Y M’hamdi, Meryem
%Y Limkonchotiwat, Peerat
%Y Bai, Richard He
%Y T.y.s.s., Santosh
%Y Han, Sophia Simeng
%Y Thapa, Surendrabikram
%Y Rim, Wiem Ben
%S Proceedings of the 9th Widening NLP Workshop
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-351-7
%F ivetta-etal-2025-insights
%X Warning: This paper contains explicit statements of offensive stereotypes which may be upsetting.Stereotypes vary across cultural contexts, making it essential to understand how language models encode social biases. MultiLingualCrowsPairs is a dataset of culturally adapted stereotypical and anti-stereotypical sentence pairs across nine languages. While prior work has primarily reported average fairness metrics on masked language models, this paper analyzes social biases in generative models by disaggregating results across specific bias types.We find that although most languages show an overall preference for stereotypical sentences, this masks substantial variation across different types of bias, such as gender, religion, and socioeconomic status. Our findings underscore that relying solely on aggregated metrics can obscure important patterns, and that fine-grained, bias-specific analysis is critical for meaningful fairness evaluation.
%U https://aclanthology.org/2025.winlp-main.20/
%P 116-122
Markdown (Informal)
[Insights from a Disaggregated Analysis of Kinds of Biases in a Multicultural Dataset](https://aclanthology.org/2025.winlp-main.20/) (Ivetta et al., WiNLP 2025)
ACL