@inproceedings{zhao-etal-2025-unmasking,
title = "Unmasking Style Sensitivity: A Causal Analysis of Bias Evaluation Instability in Large Language Models",
author = "Zhao, Jiaxu and
Fang, Meng and
Zhang, Kun and
Pechenizkiy, Mykola",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-long.796/",
doi = "10.18653/v1/2025.acl-long.796",
pages = "16314--16338",
ISBN = "979-8-89176-251-0",
abstract = "Natural language processing applications are increasingly prevalent, but social biases in their outputs remain a critical challenge. While various bias evaluation methods have been proposed, these assessments show unexpected instability when input texts undergo minor stylistic changes. This paper conducts a comprehensive analysis of how different style transformations impact bias evaluation results across multiple language models and bias types using causal inference techniques. Our findings reveal that formality transformations significantly affect bias scores, with informal style showing substantial bias reductions (up to 8.33{\%} in LLaMA-2-13B). We identify appearance bias, sexual orientation bias, and religious bias as most susceptible to style changes, with variations exceeding 20{\%}. Larger models demonstrate greater sensitivity to stylistic variations, with bias measurements fluctuating up to 3.1{\%} more than in smaller models. These results highlight critical limitations in current bias evaluation methods and emphasize the need for reliable and fair assessments of language models."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhao-etal-2025-unmasking">
<titleInfo>
<title>Unmasking Style Sensitivity: A Causal Analysis of Bias Evaluation Instability in Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jiaxu</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Meng</namePart>
<namePart type="family">Fang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mykola</namePart>
<namePart type="family">Pechenizkiy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-251-0</identifier>
</relatedItem>
<abstract>Natural language processing applications are increasingly prevalent, but social biases in their outputs remain a critical challenge. While various bias evaluation methods have been proposed, these assessments show unexpected instability when input texts undergo minor stylistic changes. This paper conducts a comprehensive analysis of how different style transformations impact bias evaluation results across multiple language models and bias types using causal inference techniques. Our findings reveal that formality transformations significantly affect bias scores, with informal style showing substantial bias reductions (up to 8.33% in LLaMA-2-13B). We identify appearance bias, sexual orientation bias, and religious bias as most susceptible to style changes, with variations exceeding 20%. Larger models demonstrate greater sensitivity to stylistic variations, with bias measurements fluctuating up to 3.1% more than in smaller models. These results highlight critical limitations in current bias evaluation methods and emphasize the need for reliable and fair assessments of language models.</abstract>
<identifier type="citekey">zhao-etal-2025-unmasking</identifier>
<identifier type="doi">10.18653/v1/2025.acl-long.796</identifier>
<location>
<url>https://aclanthology.org/2025.acl-long.796/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>16314</start>
<end>16338</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Unmasking Style Sensitivity: A Causal Analysis of Bias Evaluation Instability in Large Language Models
%A Zhao, Jiaxu
%A Fang, Meng
%A Zhang, Kun
%A Pechenizkiy, Mykola
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-251-0
%F zhao-etal-2025-unmasking
%X Natural language processing applications are increasingly prevalent, but social biases in their outputs remain a critical challenge. While various bias evaluation methods have been proposed, these assessments show unexpected instability when input texts undergo minor stylistic changes. This paper conducts a comprehensive analysis of how different style transformations impact bias evaluation results across multiple language models and bias types using causal inference techniques. Our findings reveal that formality transformations significantly affect bias scores, with informal style showing substantial bias reductions (up to 8.33% in LLaMA-2-13B). We identify appearance bias, sexual orientation bias, and religious bias as most susceptible to style changes, with variations exceeding 20%. Larger models demonstrate greater sensitivity to stylistic variations, with bias measurements fluctuating up to 3.1% more than in smaller models. These results highlight critical limitations in current bias evaluation methods and emphasize the need for reliable and fair assessments of language models.
%R 10.18653/v1/2025.acl-long.796
%U https://aclanthology.org/2025.acl-long.796/
%U https://doi.org/10.18653/v1/2025.acl-long.796
%P 16314-16338
Markdown (Informal)
[Unmasking Style Sensitivity: A Causal Analysis of Bias Evaluation Instability in Large Language Models](https://aclanthology.org/2025.acl-long.796/) (Zhao et al., ACL 2025)
ACL