@inproceedings{chen-etal-2026-cadmate,
title = "{CADM}ate: Generating {CAD} Assembly Plan with Geometric Chain-of-Thought and Spatial Physical Rewards",
author = "Chen, Jiali and
Fu, DingBa and
Hei, Xusen and
Liu, Yuhang and
Chen, Yiyang and
Xie, Jiayuan and
Fan, Wenqi and
Cai, Yi",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-long.834/",
pages = "18324--18348",
ISBN = "979-8-89176-390-6",
abstract = "Computer-aided design (CAD) is crucial in prototyping complex 3D objects through precise geometric modeling. In practical design workflows, designers manually define assembly sequences for individual CAD parts, a process that is both time-consuming and expertise-intensive. To address this challenge, we formulate CAD assembly as a parametric action prediction task: given a reference design image and disassembled parts, the model predicts 6-DoF transformations (, actions) to progressively assemble each part. This paradigm enables multimodal large language models (MLLMs) to solve the task through autoregressive action generation. While recent MLLMs demonstrate promising spatial reasoning, they struggle with fine-grained geometric structure understanding and physical collision avoidance during assembly. In this paper, we propose CADMate, an MLLM-based framework for sequential CAD assembly action generation. Our training strategy comprises three stages: (i) CAD domain adaptation for spatial geometry and position understanding, (ii) supervised fine-tuning with geometric chain-of-thought (CoT) reasoning for action generation, and (iii) reinforcement learning with spatial-physical rewards jointly optimize spatial accuracy and collision avoidance. Additionally, we also construct \textit{CADBuilder} dataset, comprising over 45\textit{K} CAD assemblies with annotated action sequences. Our experiments demonstrate that CADMate significantly outperforms existing prominent MLLMs (, GPT-5), showing great potential in design applications."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chen-etal-2026-cadmate">
<titleInfo>
<title>CADMate: Generating CAD Assembly Plan with Geometric Chain-of-Thought and Spatial Physical Rewards</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jiali</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">DingBa</namePart>
<namePart type="family">Fu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xusen</namePart>
<namePart type="family">Hei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuhang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yiyang</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiayuan</namePart>
<namePart type="family">Xie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wenqi</namePart>
<namePart type="family">Fan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yi</namePart>
<namePart type="family">Cai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-390-6</identifier>
</relatedItem>
<abstract>Computer-aided design (CAD) is crucial in prototyping complex 3D objects through precise geometric modeling. In practical design workflows, designers manually define assembly sequences for individual CAD parts, a process that is both time-consuming and expertise-intensive. To address this challenge, we formulate CAD assembly as a parametric action prediction task: given a reference design image and disassembled parts, the model predicts 6-DoF transformations (, actions) to progressively assemble each part. This paradigm enables multimodal large language models (MLLMs) to solve the task through autoregressive action generation. While recent MLLMs demonstrate promising spatial reasoning, they struggle with fine-grained geometric structure understanding and physical collision avoidance during assembly. In this paper, we propose CADMate, an MLLM-based framework for sequential CAD assembly action generation. Our training strategy comprises three stages: (i) CAD domain adaptation for spatial geometry and position understanding, (ii) supervised fine-tuning with geometric chain-of-thought (CoT) reasoning for action generation, and (iii) reinforcement learning with spatial-physical rewards jointly optimize spatial accuracy and collision avoidance. Additionally, we also construct CADBuilder dataset, comprising over 45K CAD assemblies with annotated action sequences. Our experiments demonstrate that CADMate significantly outperforms existing prominent MLLMs (, GPT-5), showing great potential in design applications.</abstract>
<identifier type="citekey">chen-etal-2026-cadmate</identifier>
<location>
<url>https://aclanthology.org/2026.acl-long.834/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>18324</start>
<end>18348</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CADMate: Generating CAD Assembly Plan with Geometric Chain-of-Thought and Spatial Physical Rewards
%A Chen, Jiali
%A Fu, DingBa
%A Hei, Xusen
%A Liu, Yuhang
%A Chen, Yiyang
%A Xie, Jiayuan
%A Fan, Wenqi
%A Cai, Yi
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-390-6
%F chen-etal-2026-cadmate
%X Computer-aided design (CAD) is crucial in prototyping complex 3D objects through precise geometric modeling. In practical design workflows, designers manually define assembly sequences for individual CAD parts, a process that is both time-consuming and expertise-intensive. To address this challenge, we formulate CAD assembly as a parametric action prediction task: given a reference design image and disassembled parts, the model predicts 6-DoF transformations (, actions) to progressively assemble each part. This paradigm enables multimodal large language models (MLLMs) to solve the task through autoregressive action generation. While recent MLLMs demonstrate promising spatial reasoning, they struggle with fine-grained geometric structure understanding and physical collision avoidance during assembly. In this paper, we propose CADMate, an MLLM-based framework for sequential CAD assembly action generation. Our training strategy comprises three stages: (i) CAD domain adaptation for spatial geometry and position understanding, (ii) supervised fine-tuning with geometric chain-of-thought (CoT) reasoning for action generation, and (iii) reinforcement learning with spatial-physical rewards jointly optimize spatial accuracy and collision avoidance. Additionally, we also construct CADBuilder dataset, comprising over 45K CAD assemblies with annotated action sequences. Our experiments demonstrate that CADMate significantly outperforms existing prominent MLLMs (, GPT-5), showing great potential in design applications.
%U https://aclanthology.org/2026.acl-long.834/
%P 18324-18348
Markdown (Informal)
[CADMate: Generating CAD Assembly Plan with Geometric Chain-of-Thought and Spatial Physical Rewards](https://aclanthology.org/2026.acl-long.834/) (Chen et al., ACL 2026)
ACL
- Jiali Chen, DingBa Fu, Xusen Hei, Yuhang Liu, Yiyang Chen, Jiayuan Xie, Wenqi Fan, and Yi Cai. 2026. CADMate: Generating CAD Assembly Plan with Geometric Chain-of-Thought and Spatial Physical Rewards. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 18324–18348, San Diego, California, United States. Association for Computational Linguistics.