@inproceedings{asad-etal-2025-beautiful,
title = "``You are Beautiful, Body Image Stereotypes are Ugly!'' {BIS}tereo: A Benchmark to Measure Body Image Stereotypes in Language Models",
author = "Asad, Narjis and
Sahoo, Nihar Ranjan and
Murthy, Rudra and
Nath, Swaprava and
Bhattacharyya, Pushpak",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.1257/",
doi = "10.18653/v1/2025.findings-acl.1257",
pages = "24471--24496",
ISBN = "979-8-89176-256-5",
abstract = "While a few high-quality bias benchmark datasets exist to address stereotypes in Language Models (LMs), a notable lack of focus remains on body image stereotypes. To bridge this gap, we propose $\textbf{BIStereo}$, a suite to uncover LMs' biases towards people of certain physical appearance characteristics, namely, $\textit{skin complexion, body shape, height, attire,}$ and a $\textit{miscellaneous category}$ including $\textit{hair texture, eye color, and more}$. Our dataset comprises 40k sentence pairs designed to assess LMs' biased preference for certain body types. We further include 60k premise-hypothesis pairs designed to comprehensively assess LMs' preference for fair skin tone. Additionally, we curate 553 tuples consisting of a $\textit{body image descriptor, gender, and a stereotypical attribute}$, validated by a diverse pool of annotators for physical appearance stereotypes.We propose a metric, $\textbf{TriSentBias}$, that captures the biased preferences of LMs towards a certain body type over others. Using $\textbf{BIStereo}$, we assess the presence of body image biases in ten different language models, revealing significant biases in models Muril, XLMR, Llama3, and Gemma. We further evaluate the LMs through downstream NLI and Analogy tasks.Our NLI experiments highlight notable patterns in the LMs that align with the well-documented cognitive bias in humans known as $\textbf{\textit{the Halo Effect}}$."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="asad-etal-2025-beautiful">
<titleInfo>
<title>“You are Beautiful, Body Image Stereotypes are Ugly!” BIStereo: A Benchmark to Measure Body Image Stereotypes in Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Narjis</namePart>
<namePart type="family">Asad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nihar</namePart>
<namePart type="given">Ranjan</namePart>
<namePart type="family">Sahoo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rudra</namePart>
<namePart type="family">Murthy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Swaprava</namePart>
<namePart type="family">Nath</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharyya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-256-5</identifier>
</relatedItem>
<abstract>While a few high-quality bias benchmark datasets exist to address stereotypes in Language Models (LMs), a notable lack of focus remains on body image stereotypes. To bridge this gap, we propose BIStereo, a suite to uncover LMs’ biases towards people of certain physical appearance characteristics, namely, skin complexion, body shape, height, attire, and a miscellaneous category including hair texture, eye color, and more. Our dataset comprises 40k sentence pairs designed to assess LMs’ biased preference for certain body types. We further include 60k premise-hypothesis pairs designed to comprehensively assess LMs’ preference for fair skin tone. Additionally, we curate 553 tuples consisting of a body image descriptor, gender, and a stereotypical attribute, validated by a diverse pool of annotators for physical appearance stereotypes.We propose a metric, TriSentBias, that captures the biased preferences of LMs towards a certain body type over others. Using BIStereo, we assess the presence of body image biases in ten different language models, revealing significant biases in models Muril, XLMR, Llama3, and Gemma. We further evaluate the LMs through downstream NLI and Analogy tasks.Our NLI experiments highlight notable patterns in the LMs that align with the well-documented cognitive bias in humans known as the Halo Effect.</abstract>
<identifier type="citekey">asad-etal-2025-beautiful</identifier>
<identifier type="doi">10.18653/v1/2025.findings-acl.1257</identifier>
<location>
<url>https://aclanthology.org/2025.findings-acl.1257/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>24471</start>
<end>24496</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T “You are Beautiful, Body Image Stereotypes are Ugly!” BIStereo: A Benchmark to Measure Body Image Stereotypes in Language Models
%A Asad, Narjis
%A Sahoo, Nihar Ranjan
%A Murthy, Rudra
%A Nath, Swaprava
%A Bhattacharyya, Pushpak
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Findings of the Association for Computational Linguistics: ACL 2025
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-256-5
%F asad-etal-2025-beautiful
%X While a few high-quality bias benchmark datasets exist to address stereotypes in Language Models (LMs), a notable lack of focus remains on body image stereotypes. To bridge this gap, we propose BIStereo, a suite to uncover LMs’ biases towards people of certain physical appearance characteristics, namely, skin complexion, body shape, height, attire, and a miscellaneous category including hair texture, eye color, and more. Our dataset comprises 40k sentence pairs designed to assess LMs’ biased preference for certain body types. We further include 60k premise-hypothesis pairs designed to comprehensively assess LMs’ preference for fair skin tone. Additionally, we curate 553 tuples consisting of a body image descriptor, gender, and a stereotypical attribute, validated by a diverse pool of annotators for physical appearance stereotypes.We propose a metric, TriSentBias, that captures the biased preferences of LMs towards a certain body type over others. Using BIStereo, we assess the presence of body image biases in ten different language models, revealing significant biases in models Muril, XLMR, Llama3, and Gemma. We further evaluate the LMs through downstream NLI and Analogy tasks.Our NLI experiments highlight notable patterns in the LMs that align with the well-documented cognitive bias in humans known as the Halo Effect.
%R 10.18653/v1/2025.findings-acl.1257
%U https://aclanthology.org/2025.findings-acl.1257/
%U https://doi.org/10.18653/v1/2025.findings-acl.1257
%P 24471-24496
Markdown (Informal)
[“You are Beautiful, Body Image Stereotypes are Ugly!” BIStereo: A Benchmark to Measure Body Image Stereotypes in Language Models](https://aclanthology.org/2025.findings-acl.1257/) (Asad et al., Findings 2025)
ACL