@inproceedings{sosto-etal-2026-queergen,
title = "{Q}ueer{G}en: How {LLM}s Reflect Societal Norms on Gender and Sexuality in Sentence Completion Task",
author = "Sosto, Mae and
Pandiani, Delfina S. Martinez and
Hollink, Laura",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {EACL} 2026",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-eacl.225/",
pages = "4305--4326",
ISBN = "979-8-89176-386-9",
abstract = "This paper examines how Large Language Models (LLMs) reproduce societal norms, particularly heterocisnormativity, and how these norms translate into measurable biases in their text generations. We investigate whether explicit information about a subject{'}s gender or sexuality influences LLM responses across three subject categories: queer-marked, non-queer-marked, and the normalized ``unmarked'' category. Representational imbalances are operationalized as measurable differences in English sentence completions across four dimensions: sentiment, regard, toxicity, and prediction diversity. Our findings show that Masked Language Models (MLMs) produce the least favorable sentiment, higher toxicity, and more negative regard for queer-marked subjects. Autoregressive Language Models (ARLMs) partially mitigate these patterns, while closed-access ARLMs tend to produce more harmful outputs for unmarked subjects. Results suggest that LLMs reproduce normative social assumptions, though the form and degree of bias depend strongly on specific model characteristics, which may redistribute{---}but not eliminate{---}representational harms."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sosto-etal-2026-queergen">
<titleInfo>
<title>QueerGen: How LLMs Reflect Societal Norms on Gender and Sexuality in Sentence Completion Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mae</namePart>
<namePart type="family">Sosto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Delfina</namePart>
<namePart type="given">S</namePart>
<namePart type="given">Martinez</namePart>
<namePart type="family">Pandiani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laura</namePart>
<namePart type="family">Hollink</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Marquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-386-9</identifier>
</relatedItem>
<abstract>This paper examines how Large Language Models (LLMs) reproduce societal norms, particularly heterocisnormativity, and how these norms translate into measurable biases in their text generations. We investigate whether explicit information about a subject’s gender or sexuality influences LLM responses across three subject categories: queer-marked, non-queer-marked, and the normalized “unmarked” category. Representational imbalances are operationalized as measurable differences in English sentence completions across four dimensions: sentiment, regard, toxicity, and prediction diversity. Our findings show that Masked Language Models (MLMs) produce the least favorable sentiment, higher toxicity, and more negative regard for queer-marked subjects. Autoregressive Language Models (ARLMs) partially mitigate these patterns, while closed-access ARLMs tend to produce more harmful outputs for unmarked subjects. Results suggest that LLMs reproduce normative social assumptions, though the form and degree of bias depend strongly on specific model characteristics, which may redistribute—but not eliminate—representational harms.</abstract>
<identifier type="citekey">sosto-etal-2026-queergen</identifier>
<location>
<url>https://aclanthology.org/2026.findings-eacl.225/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>4305</start>
<end>4326</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T QueerGen: How LLMs Reflect Societal Norms on Gender and Sexuality in Sentence Completion Task
%A Sosto, Mae
%A Pandiani, Delfina S. Martinez
%A Hollink, Laura
%Y Demberg, Vera
%Y Inui, Kentaro
%Y Marquez, Lluís
%S Findings of the Association for Computational Linguistics: EACL 2026
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-386-9
%F sosto-etal-2026-queergen
%X This paper examines how Large Language Models (LLMs) reproduce societal norms, particularly heterocisnormativity, and how these norms translate into measurable biases in their text generations. We investigate whether explicit information about a subject’s gender or sexuality influences LLM responses across three subject categories: queer-marked, non-queer-marked, and the normalized “unmarked” category. Representational imbalances are operationalized as measurable differences in English sentence completions across four dimensions: sentiment, regard, toxicity, and prediction diversity. Our findings show that Masked Language Models (MLMs) produce the least favorable sentiment, higher toxicity, and more negative regard for queer-marked subjects. Autoregressive Language Models (ARLMs) partially mitigate these patterns, while closed-access ARLMs tend to produce more harmful outputs for unmarked subjects. Results suggest that LLMs reproduce normative social assumptions, though the form and degree of bias depend strongly on specific model characteristics, which may redistribute—but not eliminate—representational harms.
%U https://aclanthology.org/2026.findings-eacl.225/
%P 4305-4326
Markdown (Informal)
[QueerGen: How LLMs Reflect Societal Norms on Gender and Sexuality in Sentence Completion Task](https://aclanthology.org/2026.findings-eacl.225/) (Sosto et al., Findings 2026)
ACL