@inproceedings{bao-etal-2025-sfmss,
title = "{SFMSS}: Service Flow aware Medical Scenario Simulation for Conversational Data Generation",
author = "Bao, Zhijie and
Liu, Qingyun and
Huang, Xuanjing and
Wei, Zhongyu",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2025",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-naacl.259/",
doi = "10.18653/v1/2025.findings-naacl.259",
pages = "4586--4604",
ISBN = "979-8-89176-195-7",
abstract = "Medical-specific Large Language Models (LLMs) have demonstrated impressive performance on medical-related exams and tasks. Despite their success in single-turn question and answering, instruction-tuned LLMs often falter in real-world healthcare applications, highlighting a disconnect between existing instruction datasets and practical contexts. To address this issue, we propose Service Flow aware Medical Scenario Simulation (SFMSS), a simulation framework designed for medical conversational data generation. SFMSS employs three key strategies to ensure the quality of the data generation. the use of Authentic Seed Data ensures alignment of real-world distributions. Diverse Patient Simulation enables simulated patients to exhibit distinct communication styles and complex behavioral logic. Service Flow Control ensures that conversations progress in alignment with medical objectives. We construct a dataset targeting on outpatient reception through SFMSS, named SFMSS-CD. Building on this dataset, we develop a model called SFMSS-Nurse. We conduct both automatic and human evaluations, involving 15 users and 15 clinical experts, to assess the effectiveness of SFMSS. The results demonstrate that SFMSS-Nurse outperforms all baselines, including the current state-of-the-art model GPT-4o, and aligns with human preferences and clinical demands."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bao-etal-2025-sfmss">
<titleInfo>
<title>SFMSS: Service Flow aware Medical Scenario Simulation for Conversational Data Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zhijie</namePart>
<namePart type="family">Bao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qingyun</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuanjing</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhongyu</namePart>
<namePart type="family">Wei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: NAACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="family">Chiruzzo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lu</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-195-7</identifier>
</relatedItem>
<abstract>Medical-specific Large Language Models (LLMs) have demonstrated impressive performance on medical-related exams and tasks. Despite their success in single-turn question and answering, instruction-tuned LLMs often falter in real-world healthcare applications, highlighting a disconnect between existing instruction datasets and practical contexts. To address this issue, we propose Service Flow aware Medical Scenario Simulation (SFMSS), a simulation framework designed for medical conversational data generation. SFMSS employs three key strategies to ensure the quality of the data generation. the use of Authentic Seed Data ensures alignment of real-world distributions. Diverse Patient Simulation enables simulated patients to exhibit distinct communication styles and complex behavioral logic. Service Flow Control ensures that conversations progress in alignment with medical objectives. We construct a dataset targeting on outpatient reception through SFMSS, named SFMSS-CD. Building on this dataset, we develop a model called SFMSS-Nurse. We conduct both automatic and human evaluations, involving 15 users and 15 clinical experts, to assess the effectiveness of SFMSS. The results demonstrate that SFMSS-Nurse outperforms all baselines, including the current state-of-the-art model GPT-4o, and aligns with human preferences and clinical demands.</abstract>
<identifier type="citekey">bao-etal-2025-sfmss</identifier>
<identifier type="doi">10.18653/v1/2025.findings-naacl.259</identifier>
<location>
<url>https://aclanthology.org/2025.findings-naacl.259/</url>
</location>
<part>
<date>2025-04</date>
<extent unit="page">
<start>4586</start>
<end>4604</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SFMSS: Service Flow aware Medical Scenario Simulation for Conversational Data Generation
%A Bao, Zhijie
%A Liu, Qingyun
%A Huang, Xuanjing
%A Wei, Zhongyu
%Y Chiruzzo, Luis
%Y Ritter, Alan
%Y Wang, Lu
%S Findings of the Association for Computational Linguistics: NAACL 2025
%D 2025
%8 April
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-195-7
%F bao-etal-2025-sfmss
%X Medical-specific Large Language Models (LLMs) have demonstrated impressive performance on medical-related exams and tasks. Despite their success in single-turn question and answering, instruction-tuned LLMs often falter in real-world healthcare applications, highlighting a disconnect between existing instruction datasets and practical contexts. To address this issue, we propose Service Flow aware Medical Scenario Simulation (SFMSS), a simulation framework designed for medical conversational data generation. SFMSS employs three key strategies to ensure the quality of the data generation. the use of Authentic Seed Data ensures alignment of real-world distributions. Diverse Patient Simulation enables simulated patients to exhibit distinct communication styles and complex behavioral logic. Service Flow Control ensures that conversations progress in alignment with medical objectives. We construct a dataset targeting on outpatient reception through SFMSS, named SFMSS-CD. Building on this dataset, we develop a model called SFMSS-Nurse. We conduct both automatic and human evaluations, involving 15 users and 15 clinical experts, to assess the effectiveness of SFMSS. The results demonstrate that SFMSS-Nurse outperforms all baselines, including the current state-of-the-art model GPT-4o, and aligns with human preferences and clinical demands.
%R 10.18653/v1/2025.findings-naacl.259
%U https://aclanthology.org/2025.findings-naacl.259/
%U https://doi.org/10.18653/v1/2025.findings-naacl.259
%P 4586-4604
Markdown (Informal)
[SFMSS: Service Flow aware Medical Scenario Simulation for Conversational Data Generation](https://aclanthology.org/2025.findings-naacl.259/) (Bao et al., Findings 2025)
ACL