@inproceedings{torres-fonseca-etal-2026-safetyalfred,
title = "{S}afety{ALFRED}: Evaluating Safety-Conscious Planning of Vision Language Models",
author = "Torres-Fonseca, Josue and
Deng, Naihao and
Dai, Yinpei and
Storks, Shane and
Zhang, Yichi and
Mihalcea, Rada and
Kennington, Casey and
Chai, Joyce",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.1852/",
pages = "37190--37211",
ISBN = "979-8-89176-395-1",
abstract = "Multimodal Large Language Models (MLLMs) are increasingly adopted as autonomous agents in interactive environments, yet their ability to proactively address safety hazards remains insufficient. We introduce SafetyALFRED, built upon the embodied agent benchmark ALFRED, augmented with six categories of real-world kitchen hazards. While existing safety evaluations focus on hazard recognition through disembodied question answering (QA) settings, we evaluate eleven state-of-the-art models from the Qwen, Gemma, and Gemini families on not only hazard recognition, but also active risk mitigation through embodied task planning. Our experimental results reveal a significant alignment gap: while models can accurately recognize hazards in QA settings, average mitigation success rates for these hazards are low in comparison. Our findings demonstrate that static evaluations through QA are insufficient for physical safety, advocating for a paradigm shift toward benchmarks that prioritize multi-step corrective actions in embodied context."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="torres-fonseca-etal-2026-safetyalfred">
<titleInfo>
<title>SafetyALFRED: Evaluating Safety-Conscious Planning of Vision Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Josue</namePart>
<namePart type="family">Torres-Fonseca</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naihao</namePart>
<namePart type="family">Deng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yinpei</namePart>
<namePart type="family">Dai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shane</namePart>
<namePart type="family">Storks</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yichi</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rada</namePart>
<namePart type="family">Mihalcea</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Casey</namePart>
<namePart type="family">Kennington</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Chai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>Multimodal Large Language Models (MLLMs) are increasingly adopted as autonomous agents in interactive environments, yet their ability to proactively address safety hazards remains insufficient. We introduce SafetyALFRED, built upon the embodied agent benchmark ALFRED, augmented with six categories of real-world kitchen hazards. While existing safety evaluations focus on hazard recognition through disembodied question answering (QA) settings, we evaluate eleven state-of-the-art models from the Qwen, Gemma, and Gemini families on not only hazard recognition, but also active risk mitigation through embodied task planning. Our experimental results reveal a significant alignment gap: while models can accurately recognize hazards in QA settings, average mitigation success rates for these hazards are low in comparison. Our findings demonstrate that static evaluations through QA are insufficient for physical safety, advocating for a paradigm shift toward benchmarks that prioritize multi-step corrective actions in embodied context.</abstract>
<identifier type="citekey">torres-fonseca-etal-2026-safetyalfred</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.1852/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>37190</start>
<end>37211</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SafetyALFRED: Evaluating Safety-Conscious Planning of Vision Language Models
%A Torres-Fonseca, Josue
%A Deng, Naihao
%A Dai, Yinpei
%A Storks, Shane
%A Zhang, Yichi
%A Mihalcea, Rada
%A Kennington, Casey
%A Chai, Joyce
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F torres-fonseca-etal-2026-safetyalfred
%X Multimodal Large Language Models (MLLMs) are increasingly adopted as autonomous agents in interactive environments, yet their ability to proactively address safety hazards remains insufficient. We introduce SafetyALFRED, built upon the embodied agent benchmark ALFRED, augmented with six categories of real-world kitchen hazards. While existing safety evaluations focus on hazard recognition through disembodied question answering (QA) settings, we evaluate eleven state-of-the-art models from the Qwen, Gemma, and Gemini families on not only hazard recognition, but also active risk mitigation through embodied task planning. Our experimental results reveal a significant alignment gap: while models can accurately recognize hazards in QA settings, average mitigation success rates for these hazards are low in comparison. Our findings demonstrate that static evaluations through QA are insufficient for physical safety, advocating for a paradigm shift toward benchmarks that prioritize multi-step corrective actions in embodied context.
%U https://aclanthology.org/2026.findings-acl.1852/
%P 37190-37211
Markdown (Informal)
[SafetyALFRED: Evaluating Safety-Conscious Planning of Vision Language Models](https://aclanthology.org/2026.findings-acl.1852/) (Torres-Fonseca et al., Findings 2026)
ACL
- Josue Torres-Fonseca, Naihao Deng, Yinpei Dai, Shane Storks, Yichi Zhang, Rada Mihalcea, Casey Kennington, and Joyce Chai. 2026. SafetyALFRED: Evaluating Safety-Conscious Planning of Vision Language Models. In Findings of the Association for Computational Linguistics: ACL 2026, pages 37190–37211, San Diego, California, United States. Association for Computational Linguistics.