@inproceedings{perez-etal-2026-simulating,
title = "Simulating Social Attitudes with {LLM}s: Accuracy, Demographic Effects, and Refusal Behavior in the Sensitive Domain of Suicide Prevention",
author = "Perez, Cristina J. and
Jr, Michael P. Vasquez and
Giabbanelli, Philippe and
Wu, Patrick Y.",
editor = "Card, Dallas and
Field, Anjalie and
Keith, Katherine and
Mendelsohn, Julia",
booktitle = "Proceedings of the Seventh Workshop on Natural Language Processing and Computational Social Science",
month = jul,
year = "2026",
address = "San Diego",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.nlpcss-1.12/",
pages = "176--189",
ISBN = "979-8-89176-426-2",
abstract = "Large language models (LLMs) are increasingly used to simulate public opinion, yet their validity in sensitive policy domains remains underexplored. We evaluate whether LLMs can reproduce attitudes toward suicide prevention policies using 32 questions drawn from seven nationally representative U.S. surveys (2023-2025). We systematically vary demographic conditioning (race/ethnicity, gender, age, education, income, party), prompt framing (direct elicitation, respondent embodiment, specialist embodiment), and model architecture (GPT-5 Nano, DeepSeek V3.2, Meta Llama 3.1 8B, Mistral Small 24B). Across 811,560 prompts, the mean absolute error{---}the average gap between predicted and human response distributions{---}is 23 percentage points. We also find that LLM responses to demographic-conditioned prompts diverge substantially from prompts without demographic information. In short, what distribution LLMs draw on when generating responses to sensitive polling questions remains unclear. Model choice matters more than framing for accuracy, whereas refusal behavior varies sharply across models and prompt designs. Our findings highlight the limitations of LLMs for social simulation in the context of sensitive topics."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="perez-etal-2026-simulating">
<titleInfo>
<title>Simulating Social Attitudes with LLMs: Accuracy, Demographic Effects, and Refusal Behavior in the Sensitive Domain of Suicide Prevention</title>
</titleInfo>
<name type="personal">
<namePart type="given">Cristina</namePart>
<namePart type="given">J</namePart>
<namePart type="family">Perez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="given">P</namePart>
<namePart type="given">Vasquez</namePart>
<namePart type="family">Jr</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philippe</namePart>
<namePart type="family">Giabbanelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patrick</namePart>
<namePart type="given">Y</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh Workshop on Natural Language Processing and Computational Social Science</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dallas</namePart>
<namePart type="family">Card</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anjalie</namePart>
<namePart type="family">Field</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katherine</namePart>
<namePart type="family">Keith</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Mendelsohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-426-2</identifier>
</relatedItem>
<abstract>Large language models (LLMs) are increasingly used to simulate public opinion, yet their validity in sensitive policy domains remains underexplored. We evaluate whether LLMs can reproduce attitudes toward suicide prevention policies using 32 questions drawn from seven nationally representative U.S. surveys (2023-2025). We systematically vary demographic conditioning (race/ethnicity, gender, age, education, income, party), prompt framing (direct elicitation, respondent embodiment, specialist embodiment), and model architecture (GPT-5 Nano, DeepSeek V3.2, Meta Llama 3.1 8B, Mistral Small 24B). Across 811,560 prompts, the mean absolute error—the average gap between predicted and human response distributions—is 23 percentage points. We also find that LLM responses to demographic-conditioned prompts diverge substantially from prompts without demographic information. In short, what distribution LLMs draw on when generating responses to sensitive polling questions remains unclear. Model choice matters more than framing for accuracy, whereas refusal behavior varies sharply across models and prompt designs. Our findings highlight the limitations of LLMs for social simulation in the context of sensitive topics.</abstract>
<identifier type="citekey">perez-etal-2026-simulating</identifier>
<location>
<url>https://aclanthology.org/2026.nlpcss-1.12/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>176</start>
<end>189</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Simulating Social Attitudes with LLMs: Accuracy, Demographic Effects, and Refusal Behavior in the Sensitive Domain of Suicide Prevention
%A Perez, Cristina J.
%A Jr, Michael P. Vasquez
%A Giabbanelli, Philippe
%A Wu, Patrick Y.
%Y Card, Dallas
%Y Field, Anjalie
%Y Keith, Katherine
%Y Mendelsohn, Julia
%S Proceedings of the Seventh Workshop on Natural Language Processing and Computational Social Science
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego
%@ 979-8-89176-426-2
%F perez-etal-2026-simulating
%X Large language models (LLMs) are increasingly used to simulate public opinion, yet their validity in sensitive policy domains remains underexplored. We evaluate whether LLMs can reproduce attitudes toward suicide prevention policies using 32 questions drawn from seven nationally representative U.S. surveys (2023-2025). We systematically vary demographic conditioning (race/ethnicity, gender, age, education, income, party), prompt framing (direct elicitation, respondent embodiment, specialist embodiment), and model architecture (GPT-5 Nano, DeepSeek V3.2, Meta Llama 3.1 8B, Mistral Small 24B). Across 811,560 prompts, the mean absolute error—the average gap between predicted and human response distributions—is 23 percentage points. We also find that LLM responses to demographic-conditioned prompts diverge substantially from prompts without demographic information. In short, what distribution LLMs draw on when generating responses to sensitive polling questions remains unclear. Model choice matters more than framing for accuracy, whereas refusal behavior varies sharply across models and prompt designs. Our findings highlight the limitations of LLMs for social simulation in the context of sensitive topics.
%U https://aclanthology.org/2026.nlpcss-1.12/
%P 176-189
Markdown (Informal)
[Simulating Social Attitudes with LLMs: Accuracy, Demographic Effects, and Refusal Behavior in the Sensitive Domain of Suicide Prevention](https://aclanthology.org/2026.nlpcss-1.12/) (Perez et al., NLP+CSS 2026)
ACL