@inproceedings{alipour-etal-2025-robustness,
title = "Robustness and Confounders in the Demographic Alignment of {LLM}s with Human Perceptions of Offensiveness",
author = "Alipour, Shayan and
Sen, Indira and
Samory, Mattia and
Mitra, Tanu",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.1136/",
doi = "10.18653/v1/2025.findings-acl.1136",
pages = "22025--22047",
ISBN = "979-8-89176-256-5",
abstract = "Despite a growing literature finding that large language models (LLMs) exhibit demographic biases, reports with whom they align best are hard to generalize or even contradictory. In this work, we examine the alignment of LLMs with human annotations in five offensive language datasets, comprising approximately 220K annotations. While demographic traits, particularly race, influence alignment, these effects vary across datasets and are often entangled with other factors. Confounders introduced in the annotation process{---}such as document difficulty, annotator sensitivity, and within-group agreement{---}account for more variation in alignment patterns than demographic traits. Alignment increases with annotator sensitivity and group agreement, and decreases with document difficulty. Our results underscore the importance of multi-dataset analyses and confounder-aware methodologies in developing robust measures of demographic bias."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="alipour-etal-2025-robustness">
<titleInfo>
<title>Robustness and Confounders in the Demographic Alignment of LLMs with Human Perceptions of Offensiveness</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shayan</namePart>
<namePart type="family">Alipour</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Indira</namePart>
<namePart type="family">Sen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mattia</namePart>
<namePart type="family">Samory</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanu</namePart>
<namePart type="family">Mitra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-256-5</identifier>
</relatedItem>
<abstract>Despite a growing literature finding that large language models (LLMs) exhibit demographic biases, reports with whom they align best are hard to generalize or even contradictory. In this work, we examine the alignment of LLMs with human annotations in five offensive language datasets, comprising approximately 220K annotations. While demographic traits, particularly race, influence alignment, these effects vary across datasets and are often entangled with other factors. Confounders introduced in the annotation process—such as document difficulty, annotator sensitivity, and within-group agreement—account for more variation in alignment patterns than demographic traits. Alignment increases with annotator sensitivity and group agreement, and decreases with document difficulty. Our results underscore the importance of multi-dataset analyses and confounder-aware methodologies in developing robust measures of demographic bias.</abstract>
<identifier type="citekey">alipour-etal-2025-robustness</identifier>
<identifier type="doi">10.18653/v1/2025.findings-acl.1136</identifier>
<location>
<url>https://aclanthology.org/2025.findings-acl.1136/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>22025</start>
<end>22047</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Robustness and Confounders in the Demographic Alignment of LLMs with Human Perceptions of Offensiveness
%A Alipour, Shayan
%A Sen, Indira
%A Samory, Mattia
%A Mitra, Tanu
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Findings of the Association for Computational Linguistics: ACL 2025
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-256-5
%F alipour-etal-2025-robustness
%X Despite a growing literature finding that large language models (LLMs) exhibit demographic biases, reports with whom they align best are hard to generalize or even contradictory. In this work, we examine the alignment of LLMs with human annotations in five offensive language datasets, comprising approximately 220K annotations. While demographic traits, particularly race, influence alignment, these effects vary across datasets and are often entangled with other factors. Confounders introduced in the annotation process—such as document difficulty, annotator sensitivity, and within-group agreement—account for more variation in alignment patterns than demographic traits. Alignment increases with annotator sensitivity and group agreement, and decreases with document difficulty. Our results underscore the importance of multi-dataset analyses and confounder-aware methodologies in developing robust measures of demographic bias.
%R 10.18653/v1/2025.findings-acl.1136
%U https://aclanthology.org/2025.findings-acl.1136/
%U https://doi.org/10.18653/v1/2025.findings-acl.1136
%P 22025-22047
Markdown (Informal)
[Robustness and Confounders in the Demographic Alignment of LLMs with Human Perceptions of Offensiveness](https://aclanthology.org/2025.findings-acl.1136/) (Alipour et al., Findings 2025)
ACL