@inproceedings{yin-etal-2025-unlocking,
title = "Unlocking Smarter Device Control: Foresighted Planning with a World Model-Driven Code Execution Approach",
author = "Yin, Xiaoran and
Luo, Xu and
Wu, Hao and
Gao, Lianli and
Song, Jingkuan",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-emnlp.213/",
pages = "3982--4005",
ISBN = "979-8-89176-335-7",
abstract = "The automatic control of mobile devices is essential for efficiently performing complex tasks that involve multiple sequential steps. However, these tasks pose significant challenges due to the limited environmental information available at each step, primarily through visual observations. As a result, current approaches, which typically rely on reactive policies, focus solely on immediate observations and often lead to suboptimal decision-making. To address this problem, we propose Foresighted Planning with World Model-Driven Code Execution (FPWC),a framework that prioritizes natural language understanding and structured reasoning to enhance the agent{'}s global understanding of the environment by developing a task-oriented, refinable world model at the outset of the task. Foresighted actions are subsequently generated through iterative planning within this world model, executed in the form of executable code. Extensive experiments conducted in simulated environments and on real mobile devices demonstrate that our method outperforms previous approaches, particularly achieving a 44.4{\%} relative improvement in task success rate compared to the state-of-the-art in the simulated environment."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yin-etal-2025-unlocking">
<titleInfo>
<title>Unlocking Smarter Device Control: Foresighted Planning with a World Model-Driven Code Execution Approach</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xiaoran</namePart>
<namePart type="family">Yin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xu</namePart>
<namePart type="family">Luo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hao</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lianli</namePart>
<namePart type="family">Gao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jingkuan</namePart>
<namePart type="family">Song</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-335-7</identifier>
</relatedItem>
<abstract>The automatic control of mobile devices is essential for efficiently performing complex tasks that involve multiple sequential steps. However, these tasks pose significant challenges due to the limited environmental information available at each step, primarily through visual observations. As a result, current approaches, which typically rely on reactive policies, focus solely on immediate observations and often lead to suboptimal decision-making. To address this problem, we propose Foresighted Planning with World Model-Driven Code Execution (FPWC),a framework that prioritizes natural language understanding and structured reasoning to enhance the agent’s global understanding of the environment by developing a task-oriented, refinable world model at the outset of the task. Foresighted actions are subsequently generated through iterative planning within this world model, executed in the form of executable code. Extensive experiments conducted in simulated environments and on real mobile devices demonstrate that our method outperforms previous approaches, particularly achieving a 44.4% relative improvement in task success rate compared to the state-of-the-art in the simulated environment.</abstract>
<identifier type="citekey">yin-etal-2025-unlocking</identifier>
<location>
<url>https://aclanthology.org/2025.findings-emnlp.213/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>3982</start>
<end>4005</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Unlocking Smarter Device Control: Foresighted Planning with a World Model-Driven Code Execution Approach
%A Yin, Xiaoran
%A Luo, Xu
%A Wu, Hao
%A Gao, Lianli
%A Song, Jingkuan
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Findings of the Association for Computational Linguistics: EMNLP 2025
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-335-7
%F yin-etal-2025-unlocking
%X The automatic control of mobile devices is essential for efficiently performing complex tasks that involve multiple sequential steps. However, these tasks pose significant challenges due to the limited environmental information available at each step, primarily through visual observations. As a result, current approaches, which typically rely on reactive policies, focus solely on immediate observations and often lead to suboptimal decision-making. To address this problem, we propose Foresighted Planning with World Model-Driven Code Execution (FPWC),a framework that prioritizes natural language understanding and structured reasoning to enhance the agent’s global understanding of the environment by developing a task-oriented, refinable world model at the outset of the task. Foresighted actions are subsequently generated through iterative planning within this world model, executed in the form of executable code. Extensive experiments conducted in simulated environments and on real mobile devices demonstrate that our method outperforms previous approaches, particularly achieving a 44.4% relative improvement in task success rate compared to the state-of-the-art in the simulated environment.
%U https://aclanthology.org/2025.findings-emnlp.213/
%P 3982-4005
Markdown (Informal)
[Unlocking Smarter Device Control: Foresighted Planning with a World Model-Driven Code Execution Approach](https://aclanthology.org/2025.findings-emnlp.213/) (Yin et al., Findings 2025)
ACL