@inproceedings{zhang-etal-2025-agentcpm,
title = "{A}gent{CPM}-{GUI}: Building Mobile-Use Agents with Reinforcement Fine-Tuning",
author = "Zhang, Zhong and
Lu, Yaxi and
Fu, Yikun and
Huo, Yupeng and
Yang, Shenzhi and
Wu, Yesai and
Si, Han and
Cong, Xin and
Chen, Haotian and
Lin, Yankai and
Xie, Jie and
Zhou, Wei and
Xu, Wang and
Zhang, Yuanheng and
Su, Zhou and
Zhai, Zhongwu and
Liu, Xiaoming and
Mei, Yudong and
Xu, Jianming and
Tian, Hongyan and
Wang, Chongyi and
Chen, Chi and
Yao, Yuan and
Liu, Zhiyuan and
Sun, Maosong",
editor = {Habernal, Ivan and
Schulam, Peter and
Tiedemann, J{\"o}rg},
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: System Demonstrations",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-demos.12/",
doi = "10.18653/v1/2025.emnlp-demos.12",
pages = "155--180",
ISBN = "979-8-89176-334-0",
abstract = "Large language model agents have enabled GUI-based automation, particularly for mobile devices. However, deployment remains limited by noisy data, poor generalization, and lack of support for non-English GUIs. In this work, we present AgentCPM-GUI, an 8B-parameter GUI agent built for robust and efficient on-device GUI interaction. Our training pipeline includes grounding-aware pre-training to enhance perception, supervised fine-tuning on high-quality Chinese and English trajectories to imitate human-like actions, and reinforcement fine-tuning with GRPO to improve reasoning capability. AgentCPM-GUI achieves promising performance on five public benchmarks and our proposed Chinese benchmark CAGUI. To facilitate reproducibility and further research, we publicly release all code, model checkpoint, and evaluation data at: https://github.com/OpenBMB/AgentCPM-GUI"
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-etal-2025-agentcpm">
<titleInfo>
<title>AgentCPM-GUI: Building Mobile-Use Agents with Reinforcement Fine-Tuning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zhong</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yaxi</namePart>
<namePart type="family">Lu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yikun</namePart>
<namePart type="family">Fu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yupeng</namePart>
<namePart type="family">Huo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shenzhi</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yesai</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Han</namePart>
<namePart type="family">Si</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xin</namePart>
<namePart type="family">Cong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haotian</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yankai</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jie</namePart>
<namePart type="family">Xie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wang</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuanheng</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhou</namePart>
<namePart type="family">Su</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhongwu</namePart>
<namePart type="family">Zhai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaoming</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yudong</namePart>
<namePart type="family">Mei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jianming</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hongyan</namePart>
<namePart type="family">Tian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chongyi</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuan</namePart>
<namePart type="family">Yao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhiyuan</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maosong</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: System Demonstrations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Habernal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Schulam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jörg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-334-0</identifier>
</relatedItem>
<abstract>Large language model agents have enabled GUI-based automation, particularly for mobile devices. However, deployment remains limited by noisy data, poor generalization, and lack of support for non-English GUIs. In this work, we present AgentCPM-GUI, an 8B-parameter GUI agent built for robust and efficient on-device GUI interaction. Our training pipeline includes grounding-aware pre-training to enhance perception, supervised fine-tuning on high-quality Chinese and English trajectories to imitate human-like actions, and reinforcement fine-tuning with GRPO to improve reasoning capability. AgentCPM-GUI achieves promising performance on five public benchmarks and our proposed Chinese benchmark CAGUI. To facilitate reproducibility and further research, we publicly release all code, model checkpoint, and evaluation data at: https://github.com/OpenBMB/AgentCPM-GUI</abstract>
<identifier type="citekey">zhang-etal-2025-agentcpm</identifier>
<identifier type="doi">10.18653/v1/2025.emnlp-demos.12</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-demos.12/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>155</start>
<end>180</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T AgentCPM-GUI: Building Mobile-Use Agents with Reinforcement Fine-Tuning
%A Zhang, Zhong
%A Lu, Yaxi
%A Fu, Yikun
%A Huo, Yupeng
%A Yang, Shenzhi
%A Wu, Yesai
%A Si, Han
%A Cong, Xin
%A Chen, Haotian
%A Lin, Yankai
%A Xie, Jie
%A Zhou, Wei
%A Xu, Wang
%A Zhang, Yuanheng
%A Su, Zhou
%A Zhai, Zhongwu
%A Liu, Xiaoming
%A Mei, Yudong
%A Xu, Jianming
%A Tian, Hongyan
%A Wang, Chongyi
%A Chen, Chi
%A Yao, Yuan
%A Liu, Zhiyuan
%A Sun, Maosong
%Y Habernal, Ivan
%Y Schulam, Peter
%Y Tiedemann, Jörg
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: System Demonstrations
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-334-0
%F zhang-etal-2025-agentcpm
%X Large language model agents have enabled GUI-based automation, particularly for mobile devices. However, deployment remains limited by noisy data, poor generalization, and lack of support for non-English GUIs. In this work, we present AgentCPM-GUI, an 8B-parameter GUI agent built for robust and efficient on-device GUI interaction. Our training pipeline includes grounding-aware pre-training to enhance perception, supervised fine-tuning on high-quality Chinese and English trajectories to imitate human-like actions, and reinforcement fine-tuning with GRPO to improve reasoning capability. AgentCPM-GUI achieves promising performance on five public benchmarks and our proposed Chinese benchmark CAGUI. To facilitate reproducibility and further research, we publicly release all code, model checkpoint, and evaluation data at: https://github.com/OpenBMB/AgentCPM-GUI
%R 10.18653/v1/2025.emnlp-demos.12
%U https://aclanthology.org/2025.emnlp-demos.12/
%U https://doi.org/10.18653/v1/2025.emnlp-demos.12
%P 155-180
Markdown (Informal)
[AgentCPM-GUI: Building Mobile-Use Agents with Reinforcement Fine-Tuning](https://aclanthology.org/2025.emnlp-demos.12/) (Zhang et al., EMNLP 2025)
ACL
- Zhong Zhang, Yaxi Lu, Yikun Fu, Yupeng Huo, Shenzhi Yang, Yesai Wu, Han Si, Xin Cong, Haotian Chen, Yankai Lin, Jie Xie, Wei Zhou, Wang Xu, Yuanheng Zhang, Zhou Su, Zhongwu Zhai, Xiaoming Liu, Yudong Mei, Jianming Xu, Hongyan Tian, Chongyi Wang, Chi Chen, Yuan Yao, Zhiyuan Liu, and Maosong Sun. 2025. AgentCPM-GUI: Building Mobile-Use Agents with Reinforcement Fine-Tuning. In Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: System Demonstrations, pages 155–180, Suzhou, China. Association for Computational Linguistics.