@inproceedings{gonzalez-etal-2026-multimodal,
title = "Multimodal Safety Evaluation in Generative Agent Social Simulations",
author = "Gonzalez, Alhim Adonai Vera and
Hinojosa, Carlos and
Sanchez, Karen and
Hamid, Haidar Bin and
Kim, Donghoon and
Ghanem, Bernard",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-long.1915/",
pages = "41295--41310",
ISBN = "979-8-89176-390-6",
abstract = "Can generative agents be trusted in multimodal environments? Despite recent advances, agents remain limited in their ability to reason about safety, coherence, and trust across modalities. We introduce a reproducible simulation framework to evaluate generative agents in three aspects: (1) safety improvement over time via iterative plan revision in multimodal scenarios; (2) detection of unsafe activities across social contexts; and (3) social dynamics, measured through interaction and acceptance rates. These multimodal agents are evaluated using metrics that quantify plan revisions and unsafe-to-safe conversions. Experiments show that while agents detect direct multimodal contradictions, they often fail to align local revisions with global safety, achieving only a 55{\%} success rate in correcting unsafe plans. We release a dataset of 1,000 multimodal plans, yielding more than 600,000 simulation steps. Notably, 45{\%} of unsafe actions are accepted when paired with misleading visual cues, revealing a strong tendency to overtrust visual content. Code is available at https://github.com/AdonaiVera/X-CASE"
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gonzalez-etal-2026-multimodal">
<titleInfo>
<title>Multimodal Safety Evaluation in Generative Agent Social Simulations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alhim</namePart>
<namePart type="given">Adonai</namePart>
<namePart type="given">Vera</namePart>
<namePart type="family">Gonzalez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carlos</namePart>
<namePart type="family">Hinojosa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karen</namePart>
<namePart type="family">Sanchez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haidar</namePart>
<namePart type="given">Bin</namePart>
<namePart type="family">Hamid</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Donghoon</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bernard</namePart>
<namePart type="family">Ghanem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-390-6</identifier>
</relatedItem>
<abstract>Can generative agents be trusted in multimodal environments? Despite recent advances, agents remain limited in their ability to reason about safety, coherence, and trust across modalities. We introduce a reproducible simulation framework to evaluate generative agents in three aspects: (1) safety improvement over time via iterative plan revision in multimodal scenarios; (2) detection of unsafe activities across social contexts; and (3) social dynamics, measured through interaction and acceptance rates. These multimodal agents are evaluated using metrics that quantify plan revisions and unsafe-to-safe conversions. Experiments show that while agents detect direct multimodal contradictions, they often fail to align local revisions with global safety, achieving only a 55% success rate in correcting unsafe plans. We release a dataset of 1,000 multimodal plans, yielding more than 600,000 simulation steps. Notably, 45% of unsafe actions are accepted when paired with misleading visual cues, revealing a strong tendency to overtrust visual content. Code is available at https://github.com/AdonaiVera/X-CASE</abstract>
<identifier type="citekey">gonzalez-etal-2026-multimodal</identifier>
<location>
<url>https://aclanthology.org/2026.acl-long.1915/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>41295</start>
<end>41310</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multimodal Safety Evaluation in Generative Agent Social Simulations
%A Gonzalez, Alhim Adonai Vera
%A Hinojosa, Carlos
%A Sanchez, Karen
%A Hamid, Haidar Bin
%A Kim, Donghoon
%A Ghanem, Bernard
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-390-6
%F gonzalez-etal-2026-multimodal
%X Can generative agents be trusted in multimodal environments? Despite recent advances, agents remain limited in their ability to reason about safety, coherence, and trust across modalities. We introduce a reproducible simulation framework to evaluate generative agents in three aspects: (1) safety improvement over time via iterative plan revision in multimodal scenarios; (2) detection of unsafe activities across social contexts; and (3) social dynamics, measured through interaction and acceptance rates. These multimodal agents are evaluated using metrics that quantify plan revisions and unsafe-to-safe conversions. Experiments show that while agents detect direct multimodal contradictions, they often fail to align local revisions with global safety, achieving only a 55% success rate in correcting unsafe plans. We release a dataset of 1,000 multimodal plans, yielding more than 600,000 simulation steps. Notably, 45% of unsafe actions are accepted when paired with misleading visual cues, revealing a strong tendency to overtrust visual content. Code is available at https://github.com/AdonaiVera/X-CASE
%U https://aclanthology.org/2026.acl-long.1915/
%P 41295-41310
Markdown (Informal)
[Multimodal Safety Evaluation in Generative Agent Social Simulations](https://aclanthology.org/2026.acl-long.1915/) (Gonzalez et al., ACL 2026)
ACL
- Alhim Adonai Vera Gonzalez, Carlos Hinojosa, Karen Sanchez, Haidar Bin Hamid, Donghoon Kim, and Bernard Ghanem. 2026. Multimodal Safety Evaluation in Generative Agent Social Simulations. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 41295–41310, San Diego, California, United States. Association for Computational Linguistics.