@inproceedings{choubey-etal-2025-turning,
title = "Turning Conversations into Workflows: A Framework to Extract and Evaluate Dialog Workflows for Service {AI} Agents",
author = "Choubey, Prafulla Kumar and
Peng, Xiangyu and
Bhagavath, Shilpa and
Xiong, Caiming and
Pentyala, Shiva Kumar and
Wu, Chien-Sheng",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.203/",
doi = "10.18653/v1/2025.findings-acl.203",
pages = "3933--3954",
ISBN = "979-8-89176-256-5",
abstract = "Automated service agents require well-structured workflows to deliver consistent and accurate responses to customer queries. However, such workflows are often undocumented, and their automatic extraction from conversations remains largely unexplored. In this work, we present a novel framework for extracting and evaluating dialog workflows from historical interactions. Our extraction process involves two key stages: (1) a retrieval step to select relevant conversations based on key procedural elements, and (2) a structured workflow generation step using question-answer-based chain-of-thought (QA-CoT) prompting. To comprehensively evaluate the quality of the extracted workflows, we introduce an automated simulation framework with agent and customer bots that measures their effectiveness in resolving customer issues. Extensive experiments on the ABCD and SynthABCD datasets show that our QA-CoT technique improves workflow extraction by 12.16{\%} in average macro accuracy over the baseline. Moreover, our evaluation method closely aligns with human assessments, offering a reliable and scalable framework for future research."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="choubey-etal-2025-turning">
<titleInfo>
<title>Turning Conversations into Workflows: A Framework to Extract and Evaluate Dialog Workflows for Service AI Agents</title>
</titleInfo>
<name type="personal">
<namePart type="given">Prafulla</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Choubey</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiangyu</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shilpa</namePart>
<namePart type="family">Bhagavath</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Caiming</namePart>
<namePart type="family">Xiong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shiva</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Pentyala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chien-Sheng</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-256-5</identifier>
</relatedItem>
<abstract>Automated service agents require well-structured workflows to deliver consistent and accurate responses to customer queries. However, such workflows are often undocumented, and their automatic extraction from conversations remains largely unexplored. In this work, we present a novel framework for extracting and evaluating dialog workflows from historical interactions. Our extraction process involves two key stages: (1) a retrieval step to select relevant conversations based on key procedural elements, and (2) a structured workflow generation step using question-answer-based chain-of-thought (QA-CoT) prompting. To comprehensively evaluate the quality of the extracted workflows, we introduce an automated simulation framework with agent and customer bots that measures their effectiveness in resolving customer issues. Extensive experiments on the ABCD and SynthABCD datasets show that our QA-CoT technique improves workflow extraction by 12.16% in average macro accuracy over the baseline. Moreover, our evaluation method closely aligns with human assessments, offering a reliable and scalable framework for future research.</abstract>
<identifier type="citekey">choubey-etal-2025-turning</identifier>
<identifier type="doi">10.18653/v1/2025.findings-acl.203</identifier>
<location>
<url>https://aclanthology.org/2025.findings-acl.203/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>3933</start>
<end>3954</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Turning Conversations into Workflows: A Framework to Extract and Evaluate Dialog Workflows for Service AI Agents
%A Choubey, Prafulla Kumar
%A Peng, Xiangyu
%A Bhagavath, Shilpa
%A Xiong, Caiming
%A Pentyala, Shiva Kumar
%A Wu, Chien-Sheng
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Findings of the Association for Computational Linguistics: ACL 2025
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-256-5
%F choubey-etal-2025-turning
%X Automated service agents require well-structured workflows to deliver consistent and accurate responses to customer queries. However, such workflows are often undocumented, and their automatic extraction from conversations remains largely unexplored. In this work, we present a novel framework for extracting and evaluating dialog workflows from historical interactions. Our extraction process involves two key stages: (1) a retrieval step to select relevant conversations based on key procedural elements, and (2) a structured workflow generation step using question-answer-based chain-of-thought (QA-CoT) prompting. To comprehensively evaluate the quality of the extracted workflows, we introduce an automated simulation framework with agent and customer bots that measures their effectiveness in resolving customer issues. Extensive experiments on the ABCD and SynthABCD datasets show that our QA-CoT technique improves workflow extraction by 12.16% in average macro accuracy over the baseline. Moreover, our evaluation method closely aligns with human assessments, offering a reliable and scalable framework for future research.
%R 10.18653/v1/2025.findings-acl.203
%U https://aclanthology.org/2025.findings-acl.203/
%U https://doi.org/10.18653/v1/2025.findings-acl.203
%P 3933-3954
Markdown (Informal)
[Turning Conversations into Workflows: A Framework to Extract and Evaluate Dialog Workflows for Service AI Agents](https://aclanthology.org/2025.findings-acl.203/) (Choubey et al., Findings 2025)
ACL