@inproceedings{tong-etal-2026-train,
title = "How to Train a Real-World Silicon Concierge? Internalizing Complex Business Workflow to Only {O}ne{M}odel",
author = "Tong, Yongqi and
Feng, Xiaoyun and
Xue, Lyuxin and
Li, Jianshe and
Zhang, Xin and
Yang, Jiang-Ming",
editor = "Li, Yunyao and
Rehm, Georg and
Tu, Mei",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics ({ACL} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-industry.91/",
pages = "1303--1316",
ISBN = "979-8-89176-394-4",
abstract = "Traditional industrial agents rely on modular pipelines, including Router, Retriever, Planner, Executor, Responder, Reviewer and so on, which inevitably fracture into a labyrinth of ad-hoc patches, leading to cascading errors and high latency. We propose OneModel, an applicable paradigm shift from external workflows to internalized knowledge representation. Unlike modular systems that slice fluid user intents into static steps, OneModel consolidates complex business logic and SOPs directly into the model{'}s parameters.Through Continual Pre-training (CPT) and logic-compilation SFT, we transform fragmented business rules into the model{'}s intuitive reasoning within a unified attention space. Deployed in our global financial service system, OneModel effectively breaks the impossible triangle of latency, accuracy, and complexity. Online A/B testing demonstrates end-to-end latency reduction of more than 50{\%} (18.7s $\rightarrow$ 8s) while the Intelligent Resolution Rate (IRR) jumps from 64.3{\%} to 83.3{\%}. The results demonstrate our paradigm OneModel effectively replaces brittle engineering logic with internalized cognitive intuition, offering a scalable and future-proof blueprint for transitioning industrial agents from complex, error-prone workflow to unified model architectures."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tong-etal-2026-train">
<titleInfo>
<title>How to Train a Real-World Silicon Concierge? Internalizing Complex Business Workflow to Only OneModel</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yongqi</namePart>
<namePart type="family">Tong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaoyun</namePart>
<namePart type="family">Feng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lyuxin</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jianshe</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xin</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiang-Ming</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (ACL 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yunyao</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Georg</namePart>
<namePart type="family">Rehm</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mei</namePart>
<namePart type="family">Tu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-394-4</identifier>
</relatedItem>
<abstract>Traditional industrial agents rely on modular pipelines, including Router, Retriever, Planner, Executor, Responder, Reviewer and so on, which inevitably fracture into a labyrinth of ad-hoc patches, leading to cascading errors and high latency. We propose OneModel, an applicable paradigm shift from external workflows to internalized knowledge representation. Unlike modular systems that slice fluid user intents into static steps, OneModel consolidates complex business logic and SOPs directly into the model’s parameters.Through Continual Pre-training (CPT) and logic-compilation SFT, we transform fragmented business rules into the model’s intuitive reasoning within a unified attention space. Deployed in our global financial service system, OneModel effectively breaks the impossible triangle of latency, accuracy, and complexity. Online A/B testing demonstrates end-to-end latency reduction of more than 50% (18.7s \rightarrow 8s) while the Intelligent Resolution Rate (IRR) jumps from 64.3% to 83.3%. The results demonstrate our paradigm OneModel effectively replaces brittle engineering logic with internalized cognitive intuition, offering a scalable and future-proof blueprint for transitioning industrial agents from complex, error-prone workflow to unified model architectures.</abstract>
<identifier type="citekey">tong-etal-2026-train</identifier>
<location>
<url>https://aclanthology.org/2026.acl-industry.91/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>1303</start>
<end>1316</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T How to Train a Real-World Silicon Concierge? Internalizing Complex Business Workflow to Only OneModel
%A Tong, Yongqi
%A Feng, Xiaoyun
%A Xue, Lyuxin
%A Li, Jianshe
%A Zhang, Xin
%A Yang, Jiang-Ming
%Y Li, Yunyao
%Y Rehm, Georg
%Y Tu, Mei
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (ACL 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, USA
%@ 979-8-89176-394-4
%F tong-etal-2026-train
%X Traditional industrial agents rely on modular pipelines, including Router, Retriever, Planner, Executor, Responder, Reviewer and so on, which inevitably fracture into a labyrinth of ad-hoc patches, leading to cascading errors and high latency. We propose OneModel, an applicable paradigm shift from external workflows to internalized knowledge representation. Unlike modular systems that slice fluid user intents into static steps, OneModel consolidates complex business logic and SOPs directly into the model’s parameters.Through Continual Pre-training (CPT) and logic-compilation SFT, we transform fragmented business rules into the model’s intuitive reasoning within a unified attention space. Deployed in our global financial service system, OneModel effectively breaks the impossible triangle of latency, accuracy, and complexity. Online A/B testing demonstrates end-to-end latency reduction of more than 50% (18.7s \rightarrow 8s) while the Intelligent Resolution Rate (IRR) jumps from 64.3% to 83.3%. The results demonstrate our paradigm OneModel effectively replaces brittle engineering logic with internalized cognitive intuition, offering a scalable and future-proof blueprint for transitioning industrial agents from complex, error-prone workflow to unified model architectures.
%U https://aclanthology.org/2026.acl-industry.91/
%P 1303-1316
Markdown (Informal)
[How to Train a Real-World Silicon Concierge? Internalizing Complex Business Workflow to Only OneModel](https://aclanthology.org/2026.acl-industry.91/) (Tong et al., ACL 2026)
ACL