@inproceedings{sundar-etal-2025-hidden,
title = "Hidden Forms: A Dataset to Fill Masked Interfaces from Language Commands",
author = "Sundar, Anirudh and
Richardson, Christopher Gordon and
Gay, William and
Reichman, Benjamin and
Heck, Larry",
editor = "Kamalloo, Ehsan and
Gontier, Nicolas and
Lu, Xing Han and
Dziri, Nouha and
Murty, Shikhar and
Lacoste, Alexandre",
booktitle = "Proceedings of the 1st Workshop for Research on Agent Language Models (REALM 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.realm-1.7/",
doi = "10.18653/v1/2025.realm-1.7",
pages = "89--96",
ISBN = "979-8-89176-264-0",
abstract = "This paper introduces Hidden Forms (hFORMS), a dataset of natural language commands paired with user interfaces with masked visual context. By obscuring specific UI elements, the dataset challenges Computer-Using Agents to parse natural language instructions and infer the correct bounding box locations by leveraging UI context. Furthermore, hFORMS contains three distinct masking strategies representing progressive difficulty levels. Additionally, we explore parameter-efficient fine-tuning approaches using Vision-Language models from the Llama and Qwen series, demonstrating that fine-tuning on mobile domains results in more than 5x improvement in zero-shot domain adaptation performance when identifying bounding boxes on the desktop and web domains."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sundar-etal-2025-hidden">
<titleInfo>
<title>Hidden Forms: A Dataset to Fill Masked Interfaces from Language Commands</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anirudh</namePart>
<namePart type="family">Sundar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="given">Gordon</namePart>
<namePart type="family">Richardson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">William</namePart>
<namePart type="family">Gay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benjamin</namePart>
<namePart type="family">Reichman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Larry</namePart>
<namePart type="family">Heck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop for Research on Agent Language Models (REALM 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ehsan</namePart>
<namePart type="family">Kamalloo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nicolas</namePart>
<namePart type="family">Gontier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xing</namePart>
<namePart type="given">Han</namePart>
<namePart type="family">Lu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nouha</namePart>
<namePart type="family">Dziri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shikhar</namePart>
<namePart type="family">Murty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexandre</namePart>
<namePart type="family">Lacoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-264-0</identifier>
</relatedItem>
<abstract>This paper introduces Hidden Forms (hFORMS), a dataset of natural language commands paired with user interfaces with masked visual context. By obscuring specific UI elements, the dataset challenges Computer-Using Agents to parse natural language instructions and infer the correct bounding box locations by leveraging UI context. Furthermore, hFORMS contains three distinct masking strategies representing progressive difficulty levels. Additionally, we explore parameter-efficient fine-tuning approaches using Vision-Language models from the Llama and Qwen series, demonstrating that fine-tuning on mobile domains results in more than 5x improvement in zero-shot domain adaptation performance when identifying bounding boxes on the desktop and web domains.</abstract>
<identifier type="citekey">sundar-etal-2025-hidden</identifier>
<identifier type="doi">10.18653/v1/2025.realm-1.7</identifier>
<location>
<url>https://aclanthology.org/2025.realm-1.7/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>89</start>
<end>96</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Hidden Forms: A Dataset to Fill Masked Interfaces from Language Commands
%A Sundar, Anirudh
%A Richardson, Christopher Gordon
%A Gay, William
%A Reichman, Benjamin
%A Heck, Larry
%Y Kamalloo, Ehsan
%Y Gontier, Nicolas
%Y Lu, Xing Han
%Y Dziri, Nouha
%Y Murty, Shikhar
%Y Lacoste, Alexandre
%S Proceedings of the 1st Workshop for Research on Agent Language Models (REALM 2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-264-0
%F sundar-etal-2025-hidden
%X This paper introduces Hidden Forms (hFORMS), a dataset of natural language commands paired with user interfaces with masked visual context. By obscuring specific UI elements, the dataset challenges Computer-Using Agents to parse natural language instructions and infer the correct bounding box locations by leveraging UI context. Furthermore, hFORMS contains three distinct masking strategies representing progressive difficulty levels. Additionally, we explore parameter-efficient fine-tuning approaches using Vision-Language models from the Llama and Qwen series, demonstrating that fine-tuning on mobile domains results in more than 5x improvement in zero-shot domain adaptation performance when identifying bounding boxes on the desktop and web domains.
%R 10.18653/v1/2025.realm-1.7
%U https://aclanthology.org/2025.realm-1.7/
%U https://doi.org/10.18653/v1/2025.realm-1.7
%P 89-96
Markdown (Informal)
[Hidden Forms: A Dataset to Fill Masked Interfaces from Language Commands](https://aclanthology.org/2025.realm-1.7/) (Sundar et al., REALM 2025)
ACL