@inproceedings{bladsjo-munoz-sanchez-2025-introducing,
title = "Introducing {MARB} {---} A Dataset for Studying the Social Dimensions of Reporting Bias in Language Models",
author = {S{\"o}dahl Bladsj{\"o}, Tom and
Mu{\~n}oz S{\'a}nchez, Ricardo},
editor = "Fale{\'n}ska, Agnieszka and
Basta, Christine and
Costa-juss{\`a}, Marta and
Sta{\'n}czak, Karolina and
Nozza, Debora",
booktitle = "Proceedings of the 6th Workshop on Gender Bias in Natural Language Processing (GeBNLP)",
month = aug,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.gebnlp-1.5/",
doi = "10.18653/v1/2025.gebnlp-1.5",
pages = "59--74",
ISBN = "979-8-89176-277-0",
abstract = "Reporting bias is the tendency for speakers to omit unnecessary or obvious information while mentioning things they consider relevant or surprising. In descriptions of people, reporting bias can manifest as a tendency to over report on attributes that deviate from the norm. While social bias in language models has garnered a lot of attention in recent years, a majority of the existing work equates ``bias'' with ``stereotypes''. We suggest reporting bias as an alternative lens through which to study how social attitudes manifest in language models. We present the MARB dataset, a diagnostic dataset for studying the interaction between social bias and reporting bias in language models. We use MARB to evaluate the off-the-shelf behavior of both masked and autoregressive language models and find signs of reporting bias with regards to marginalized identities, mirroring that which can be found in human text. This effect is particularly pronounced when taking gender into account, demonstrating the importance of considering intersectionality when studying social phenomena like biases."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bladsjo-munoz-sanchez-2025-introducing">
<titleInfo>
<title>Introducing MARB — A Dataset for Studying the Social Dimensions of Reporting Bias in Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="family">Södahl Bladsjö</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ricardo</namePart>
<namePart type="family">Muñoz Sánchez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 6th Workshop on Gender Bias in Natural Language Processing (GeBNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Agnieszka</namePart>
<namePart type="family">Faleńska</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christine</namePart>
<namePart type="family">Basta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marta</namePart>
<namePart type="family">Costa-jussà</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karolina</namePart>
<namePart type="family">Stańczak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debora</namePart>
<namePart type="family">Nozza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-277-0</identifier>
</relatedItem>
<abstract>Reporting bias is the tendency for speakers to omit unnecessary or obvious information while mentioning things they consider relevant or surprising. In descriptions of people, reporting bias can manifest as a tendency to over report on attributes that deviate from the norm. While social bias in language models has garnered a lot of attention in recent years, a majority of the existing work equates “bias” with “stereotypes”. We suggest reporting bias as an alternative lens through which to study how social attitudes manifest in language models. We present the MARB dataset, a diagnostic dataset for studying the interaction between social bias and reporting bias in language models. We use MARB to evaluate the off-the-shelf behavior of both masked and autoregressive language models and find signs of reporting bias with regards to marginalized identities, mirroring that which can be found in human text. This effect is particularly pronounced when taking gender into account, demonstrating the importance of considering intersectionality when studying social phenomena like biases.</abstract>
<identifier type="citekey">bladsjo-munoz-sanchez-2025-introducing</identifier>
<identifier type="doi">10.18653/v1/2025.gebnlp-1.5</identifier>
<location>
<url>https://aclanthology.org/2025.gebnlp-1.5/</url>
</location>
<part>
<date>2025-08</date>
<extent unit="page">
<start>59</start>
<end>74</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Introducing MARB — A Dataset for Studying the Social Dimensions of Reporting Bias in Language Models
%A Södahl Bladsjö, Tom
%A Muñoz Sánchez, Ricardo
%Y Faleńska, Agnieszka
%Y Basta, Christine
%Y Costa-jussà, Marta
%Y Stańczak, Karolina
%Y Nozza, Debora
%S Proceedings of the 6th Workshop on Gender Bias in Natural Language Processing (GeBNLP)
%D 2025
%8 August
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-277-0
%F bladsjo-munoz-sanchez-2025-introducing
%X Reporting bias is the tendency for speakers to omit unnecessary or obvious information while mentioning things they consider relevant or surprising. In descriptions of people, reporting bias can manifest as a tendency to over report on attributes that deviate from the norm. While social bias in language models has garnered a lot of attention in recent years, a majority of the existing work equates “bias” with “stereotypes”. We suggest reporting bias as an alternative lens through which to study how social attitudes manifest in language models. We present the MARB dataset, a diagnostic dataset for studying the interaction between social bias and reporting bias in language models. We use MARB to evaluate the off-the-shelf behavior of both masked and autoregressive language models and find signs of reporting bias with regards to marginalized identities, mirroring that which can be found in human text. This effect is particularly pronounced when taking gender into account, demonstrating the importance of considering intersectionality when studying social phenomena like biases.
%R 10.18653/v1/2025.gebnlp-1.5
%U https://aclanthology.org/2025.gebnlp-1.5/
%U https://doi.org/10.18653/v1/2025.gebnlp-1.5
%P 59-74
Markdown (Informal)
[Introducing MARB — A Dataset for Studying the Social Dimensions of Reporting Bias in Language Models](https://aclanthology.org/2025.gebnlp-1.5/) (Södahl Bladsjö & Muñoz Sánchez, GeBNLP 2025)
ACL