@inproceedings{rahimzadeh-etal-2026-synthia,
title = "Synthia: Scalable Grounded Persona Generation from Social Media Data",
author = "Rahimzadeh, Vahid and
Moosavi Monazzah, Erfan and
Pilehvar, Mohammad Taher and
Yaghoobzadeh, Yadollah",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-long.2134/",
pages = "45983--46006",
ISBN = "979-8-89176-390-6",
abstract = "Persona-driven simulations are increasingly used in computational social science, yet their validity critically depends on the fidelity of the underlying personas. Constructing virtual populations that are both authentic and scalable remains a central challenge. We introduce Synthia, a persona-generation framework that grounds LLM-generated personas in real social-media posts while delegating narrative construction to language models, using publicly available data from the Bluesky platform. Across multiple social-survey benchmarks, Synthia improves alignment with human opinion distributions over prior state-of-the-art approaches while relying on substantially smaller models. A multi-dimensional fairness and bias analysis shows that Synthia outperforms previous methods for most demographics across different dimensions. Uniquely, Synthia preserves interaction-graph structure among personas grounded in real social network users, enabling network-aware analysis, which we demonstrate through two homophily-focused case studies. Together, these results position Synthia as a practical and reliable framework for constructing scalable, high-fidelity, and equitable virtual populations."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rahimzadeh-etal-2026-synthia">
<titleInfo>
<title>Synthia: Scalable Grounded Persona Generation from Social Media Data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vahid</namePart>
<namePart type="family">Rahimzadeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Erfan</namePart>
<namePart type="family">Moosavi Monazzah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yadollah</namePart>
<namePart type="family">Yaghoobzadeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-390-6</identifier>
</relatedItem>
<abstract>Persona-driven simulations are increasingly used in computational social science, yet their validity critically depends on the fidelity of the underlying personas. Constructing virtual populations that are both authentic and scalable remains a central challenge. We introduce Synthia, a persona-generation framework that grounds LLM-generated personas in real social-media posts while delegating narrative construction to language models, using publicly available data from the Bluesky platform. Across multiple social-survey benchmarks, Synthia improves alignment with human opinion distributions over prior state-of-the-art approaches while relying on substantially smaller models. A multi-dimensional fairness and bias analysis shows that Synthia outperforms previous methods for most demographics across different dimensions. Uniquely, Synthia preserves interaction-graph structure among personas grounded in real social network users, enabling network-aware analysis, which we demonstrate through two homophily-focused case studies. Together, these results position Synthia as a practical and reliable framework for constructing scalable, high-fidelity, and equitable virtual populations.</abstract>
<identifier type="citekey">rahimzadeh-etal-2026-synthia</identifier>
<location>
<url>https://aclanthology.org/2026.acl-long.2134/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>45983</start>
<end>46006</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Synthia: Scalable Grounded Persona Generation from Social Media Data
%A Rahimzadeh, Vahid
%A Moosavi Monazzah, Erfan
%A Pilehvar, Mohammad Taher
%A Yaghoobzadeh, Yadollah
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-390-6
%F rahimzadeh-etal-2026-synthia
%X Persona-driven simulations are increasingly used in computational social science, yet their validity critically depends on the fidelity of the underlying personas. Constructing virtual populations that are both authentic and scalable remains a central challenge. We introduce Synthia, a persona-generation framework that grounds LLM-generated personas in real social-media posts while delegating narrative construction to language models, using publicly available data from the Bluesky platform. Across multiple social-survey benchmarks, Synthia improves alignment with human opinion distributions over prior state-of-the-art approaches while relying on substantially smaller models. A multi-dimensional fairness and bias analysis shows that Synthia outperforms previous methods for most demographics across different dimensions. Uniquely, Synthia preserves interaction-graph structure among personas grounded in real social network users, enabling network-aware analysis, which we demonstrate through two homophily-focused case studies. Together, these results position Synthia as a practical and reliable framework for constructing scalable, high-fidelity, and equitable virtual populations.
%U https://aclanthology.org/2026.acl-long.2134/
%P 45983-46006
Markdown (Informal)
[Synthia: Scalable Grounded Persona Generation from Social Media Data](https://aclanthology.org/2026.acl-long.2134/) (Rahimzadeh et al., ACL 2026)
ACL
- Vahid Rahimzadeh, Erfan Moosavi Monazzah, Mohammad Taher Pilehvar, and Yadollah Yaghoobzadeh. 2026. Synthia: Scalable Grounded Persona Generation from Social Media Data. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 45983–46006, San Diego, California, United States. Association for Computational Linguistics.