@inproceedings{du-etal-2025-dflow,
title = "{DFLOW}: Diverse Dialogue Flow Simulation with Large Language Models",
author = "Du, Wanyu and
Feng, Song and
Gung, James and
Sun, Lijia and
Zhang, Yi and
Mansour, Saab and
Qi, Yanjun",
editor = "Kamalloo, Ehsan and
Gontier, Nicolas and
Lu, Xing Han and
Dziri, Nouha and
Murty, Shikhar and
Lacoste, Alexandre",
booktitle = "Proceedings of the 1st Workshop for Research on Agent Language Models (REALM 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.realm-1.2/",
doi = "10.18653/v1/2025.realm-1.2",
pages = "17--32",
ISBN = "979-8-89176-264-0",
abstract = "Developing language model-based dialogue agents requires effective data to train models that can follow specific task logic. However, most existing data simulation methods focus on increasing diversity in language, topics, or dialogue acts at the utterance level, largely neglecting a critical aspect of task logic diversity at the dialogue level. This paper proposes a novel data simulation method designed to enhance the diversity of synthetic dialogues by focusing on task execution logic. Our method uses LLMs to generate decision tree-structured task plans, which enables the derivation of diverse dialogue trajectories for a given task. Each trajectory, referred to as a ``dialog flow'', guides the generation of a multi-turn dialogue that follows a unique trajectory. We apply this method to generate a task-oriented dialogue dataset comprising 3,886 dialogue flows across 15 different domains. We validate the effectiveness of this dataset using the next action prediction task, where models fine-tuned on our dataset outperform strong baselines, including GPT-4. Upon acceptance of this paper, we plan to release the code and data publicly."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="du-etal-2025-dflow">
<titleInfo>
<title>DFLOW: Diverse Dialogue Flow Simulation with Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanyu</namePart>
<namePart type="family">Du</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Song</namePart>
<namePart type="family">Feng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Gung</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lijia</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yi</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saab</namePart>
<namePart type="family">Mansour</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yanjun</namePart>
<namePart type="family">Qi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop for Research on Agent Language Models (REALM 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ehsan</namePart>
<namePart type="family">Kamalloo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nicolas</namePart>
<namePart type="family">Gontier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xing</namePart>
<namePart type="given">Han</namePart>
<namePart type="family">Lu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nouha</namePart>
<namePart type="family">Dziri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shikhar</namePart>
<namePart type="family">Murty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexandre</namePart>
<namePart type="family">Lacoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-264-0</identifier>
</relatedItem>
<abstract>Developing language model-based dialogue agents requires effective data to train models that can follow specific task logic. However, most existing data simulation methods focus on increasing diversity in language, topics, or dialogue acts at the utterance level, largely neglecting a critical aspect of task logic diversity at the dialogue level. This paper proposes a novel data simulation method designed to enhance the diversity of synthetic dialogues by focusing on task execution logic. Our method uses LLMs to generate decision tree-structured task plans, which enables the derivation of diverse dialogue trajectories for a given task. Each trajectory, referred to as a “dialog flow”, guides the generation of a multi-turn dialogue that follows a unique trajectory. We apply this method to generate a task-oriented dialogue dataset comprising 3,886 dialogue flows across 15 different domains. We validate the effectiveness of this dataset using the next action prediction task, where models fine-tuned on our dataset outperform strong baselines, including GPT-4. Upon acceptance of this paper, we plan to release the code and data publicly.</abstract>
<identifier type="citekey">du-etal-2025-dflow</identifier>
<identifier type="doi">10.18653/v1/2025.realm-1.2</identifier>
<location>
<url>https://aclanthology.org/2025.realm-1.2/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>17</start>
<end>32</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DFLOW: Diverse Dialogue Flow Simulation with Large Language Models
%A Du, Wanyu
%A Feng, Song
%A Gung, James
%A Sun, Lijia
%A Zhang, Yi
%A Mansour, Saab
%A Qi, Yanjun
%Y Kamalloo, Ehsan
%Y Gontier, Nicolas
%Y Lu, Xing Han
%Y Dziri, Nouha
%Y Murty, Shikhar
%Y Lacoste, Alexandre
%S Proceedings of the 1st Workshop for Research on Agent Language Models (REALM 2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-264-0
%F du-etal-2025-dflow
%X Developing language model-based dialogue agents requires effective data to train models that can follow specific task logic. However, most existing data simulation methods focus on increasing diversity in language, topics, or dialogue acts at the utterance level, largely neglecting a critical aspect of task logic diversity at the dialogue level. This paper proposes a novel data simulation method designed to enhance the diversity of synthetic dialogues by focusing on task execution logic. Our method uses LLMs to generate decision tree-structured task plans, which enables the derivation of diverse dialogue trajectories for a given task. Each trajectory, referred to as a “dialog flow”, guides the generation of a multi-turn dialogue that follows a unique trajectory. We apply this method to generate a task-oriented dialogue dataset comprising 3,886 dialogue flows across 15 different domains. We validate the effectiveness of this dataset using the next action prediction task, where models fine-tuned on our dataset outperform strong baselines, including GPT-4. Upon acceptance of this paper, we plan to release the code and data publicly.
%R 10.18653/v1/2025.realm-1.2
%U https://aclanthology.org/2025.realm-1.2/
%U https://doi.org/10.18653/v1/2025.realm-1.2
%P 17-32
Markdown (Informal)
[DFLOW: Diverse Dialogue Flow Simulation with Large Language Models](https://aclanthology.org/2025.realm-1.2/) (Du et al., REALM 2025)
ACL