@inproceedings{gou-etal-2025-advancing,
title = "Advancing {E}-commerce Merchants Telemarketing with Synthetic Data-Driven {LLM}s",
author = "Gou, Qi and
Xia, Zehua and
Juan, Li and
Zhao, Qingyang and
Yang, Wenjing",
editor = "Potdar, Saloni and
Rojas-Barahona, Lina and
Montella, Sebastien",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track",
month = nov,
year = "2025",
address = "Suzhou (China)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-industry.150/",
pages = "2146--2154",
ISBN = "979-8-89176-333-3",
abstract = "Telemarketing towards merchants is considerably more complex than traditional dialogue systems. Given a user utterance, the response must not only follow the context but also strategically and naturally guide the conversation toward marketing objectives. A common approach is to fine-tune LLMs using high-quality dialogue data from top sales. However, we find that even after careful data cleaning, these data cannot be used directly due to two issues:(1) Poor strategy-following: Real-world conversations are highly random with much chit-chat topics, easily leading deviation from intended strategy.(2) Insufficient expert knowledge learning: Expert knowledge appears infrequently or not at all in limited collected corpus.To this end, we introduce a hybrid data synthesis framework with two main innovations. First, we unify the input schema with profile and strategy designed by top sales, and extract them via a Multi-task paradigm.In addition, we propose Role-playing Simulation and Session Prefix Completion to complementarily improve the strategy-following and inject long-tail expert knowledge into our fine-tuned model {--} TeleBot.Comprehensive online and offline evaluations demonstrate its effectiveness.In particular, in terms of the final marketing results {--} High Intention Rate, TeleBot reaches the performance level of the top 25{\%} of human sales."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gou-etal-2025-advancing">
<titleInfo>
<title>Advancing E-commerce Merchants Telemarketing with Synthetic Data-Driven LLMs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Qi</namePart>
<namePart type="family">Gou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zehua</namePart>
<namePart type="family">Xia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Li</namePart>
<namePart type="family">Juan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qingyang</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wenjing</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saloni</namePart>
<namePart type="family">Potdar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lina</namePart>
<namePart type="family">Rojas-Barahona</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastien</namePart>
<namePart type="family">Montella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou (China)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-333-3</identifier>
</relatedItem>
<abstract>Telemarketing towards merchants is considerably more complex than traditional dialogue systems. Given a user utterance, the response must not only follow the context but also strategically and naturally guide the conversation toward marketing objectives. A common approach is to fine-tune LLMs using high-quality dialogue data from top sales. However, we find that even after careful data cleaning, these data cannot be used directly due to two issues:(1) Poor strategy-following: Real-world conversations are highly random with much chit-chat topics, easily leading deviation from intended strategy.(2) Insufficient expert knowledge learning: Expert knowledge appears infrequently or not at all in limited collected corpus.To this end, we introduce a hybrid data synthesis framework with two main innovations. First, we unify the input schema with profile and strategy designed by top sales, and extract them via a Multi-task paradigm.In addition, we propose Role-playing Simulation and Session Prefix Completion to complementarily improve the strategy-following and inject long-tail expert knowledge into our fine-tuned model – TeleBot.Comprehensive online and offline evaluations demonstrate its effectiveness.In particular, in terms of the final marketing results – High Intention Rate, TeleBot reaches the performance level of the top 25% of human sales.</abstract>
<identifier type="citekey">gou-etal-2025-advancing</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-industry.150/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>2146</start>
<end>2154</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Advancing E-commerce Merchants Telemarketing with Synthetic Data-Driven LLMs
%A Gou, Qi
%A Xia, Zehua
%A Juan, Li
%A Zhao, Qingyang
%A Yang, Wenjing
%Y Potdar, Saloni
%Y Rojas-Barahona, Lina
%Y Montella, Sebastien
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou (China)
%@ 979-8-89176-333-3
%F gou-etal-2025-advancing
%X Telemarketing towards merchants is considerably more complex than traditional dialogue systems. Given a user utterance, the response must not only follow the context but also strategically and naturally guide the conversation toward marketing objectives. A common approach is to fine-tune LLMs using high-quality dialogue data from top sales. However, we find that even after careful data cleaning, these data cannot be used directly due to two issues:(1) Poor strategy-following: Real-world conversations are highly random with much chit-chat topics, easily leading deviation from intended strategy.(2) Insufficient expert knowledge learning: Expert knowledge appears infrequently or not at all in limited collected corpus.To this end, we introduce a hybrid data synthesis framework with two main innovations. First, we unify the input schema with profile and strategy designed by top sales, and extract them via a Multi-task paradigm.In addition, we propose Role-playing Simulation and Session Prefix Completion to complementarily improve the strategy-following and inject long-tail expert knowledge into our fine-tuned model – TeleBot.Comprehensive online and offline evaluations demonstrate its effectiveness.In particular, in terms of the final marketing results – High Intention Rate, TeleBot reaches the performance level of the top 25% of human sales.
%U https://aclanthology.org/2025.emnlp-industry.150/
%P 2146-2154
Markdown (Informal)
[Advancing E-commerce Merchants Telemarketing with Synthetic Data-Driven LLMs](https://aclanthology.org/2025.emnlp-industry.150/) (Gou et al., EMNLP 2025)
ACL