@inproceedings{goyal-etal-2025-specialtyscribe,
title = "{S}pecialty{S}cribe: Enhancing {SOAP} note Scribing for Medical Specialties using {LLM}{'}s",
author = "Goyal, Sagar and
Rastogi, Eti and
Zhao, Fen and
Yuan, Dong and
Beinstein, Andrew",
editor = "Ananiadou, Sophia and
Demner-Fushman, Dina and
Gupta, Deepak and
Thompson, Paul",
booktitle = "Proceedings of the Second Workshop on Patient-Oriented Language Processing (CL4Health)",
month = may,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.cl4health-1.4/",
doi = "10.18653/v1/2025.cl4health-1.4",
pages = "34--45",
ISBN = "979-8-89176-238-1",
abstract = "The healthcare industry has accumulated vast amounts of clinical data, much of which has traditionally been unstructured, including medical records, clinical data, patient communications, and visit notes. Clinician-patient conversations form a crucial part of medical records, with the resulting medical note serving as the ground truth for future interactions and treatment plans. Generating concise and accurate SOAP notes is critical for quality patient care and is especially challenging in specialty care, where relevance, clarity, and adherence to clinician preferences are paramount. These requirements make general-purpose LLMs unsuitable for producing high-quality specialty notes. While recent LLMs like GPT-4 and Sonnet 3.5 have shown promise, their high cost, size, latency, and privacy issues remain barriers for many healthcare providers.We introduce SpecialtyScribe, a modular pipeline for generating specialty-specific medical notes. It features three components: an Information Extractor to capture relevant data, a Context Retriever to verify and augment content from transcripts, and a Note Writer to produce high quality notes. Our framework and in-house models outperform similarly sized open-source models by over 12{\%} on ROUGE metrics.Additionally, these models match top closed-source LLMs' performance while being under 1{\%} of their size. We specifically evaluate our framework for oncology, with the potential for adaptation to other specialties."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="goyal-etal-2025-specialtyscribe">
<titleInfo>
<title>SpecialtyScribe: Enhancing SOAP note Scribing for Medical Specialties using LLM’s</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sagar</namePart>
<namePart type="family">Goyal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eti</namePart>
<namePart type="family">Rastogi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fen</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dong</namePart>
<namePart type="family">Yuan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrew</namePart>
<namePart type="family">Beinstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Patient-Oriented Language Processing (CL4Health)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="family">Ananiadou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dina</namePart>
<namePart type="family">Demner-Fushman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Deepak</namePart>
<namePart type="family">Gupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Thompson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-238-1</identifier>
</relatedItem>
<abstract>The healthcare industry has accumulated vast amounts of clinical data, much of which has traditionally been unstructured, including medical records, clinical data, patient communications, and visit notes. Clinician-patient conversations form a crucial part of medical records, with the resulting medical note serving as the ground truth for future interactions and treatment plans. Generating concise and accurate SOAP notes is critical for quality patient care and is especially challenging in specialty care, where relevance, clarity, and adherence to clinician preferences are paramount. These requirements make general-purpose LLMs unsuitable for producing high-quality specialty notes. While recent LLMs like GPT-4 and Sonnet 3.5 have shown promise, their high cost, size, latency, and privacy issues remain barriers for many healthcare providers.We introduce SpecialtyScribe, a modular pipeline for generating specialty-specific medical notes. It features three components: an Information Extractor to capture relevant data, a Context Retriever to verify and augment content from transcripts, and a Note Writer to produce high quality notes. Our framework and in-house models outperform similarly sized open-source models by over 12% on ROUGE metrics.Additionally, these models match top closed-source LLMs’ performance while being under 1% of their size. We specifically evaluate our framework for oncology, with the potential for adaptation to other specialties.</abstract>
<identifier type="citekey">goyal-etal-2025-specialtyscribe</identifier>
<identifier type="doi">10.18653/v1/2025.cl4health-1.4</identifier>
<location>
<url>https://aclanthology.org/2025.cl4health-1.4/</url>
</location>
<part>
<date>2025-05</date>
<extent unit="page">
<start>34</start>
<end>45</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SpecialtyScribe: Enhancing SOAP note Scribing for Medical Specialties using LLM’s
%A Goyal, Sagar
%A Rastogi, Eti
%A Zhao, Fen
%A Yuan, Dong
%A Beinstein, Andrew
%Y Ananiadou, Sophia
%Y Demner-Fushman, Dina
%Y Gupta, Deepak
%Y Thompson, Paul
%S Proceedings of the Second Workshop on Patient-Oriented Language Processing (CL4Health)
%D 2025
%8 May
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-238-1
%F goyal-etal-2025-specialtyscribe
%X The healthcare industry has accumulated vast amounts of clinical data, much of which has traditionally been unstructured, including medical records, clinical data, patient communications, and visit notes. Clinician-patient conversations form a crucial part of medical records, with the resulting medical note serving as the ground truth for future interactions and treatment plans. Generating concise and accurate SOAP notes is critical for quality patient care and is especially challenging in specialty care, where relevance, clarity, and adherence to clinician preferences are paramount. These requirements make general-purpose LLMs unsuitable for producing high-quality specialty notes. While recent LLMs like GPT-4 and Sonnet 3.5 have shown promise, their high cost, size, latency, and privacy issues remain barriers for many healthcare providers.We introduce SpecialtyScribe, a modular pipeline for generating specialty-specific medical notes. It features three components: an Information Extractor to capture relevant data, a Context Retriever to verify and augment content from transcripts, and a Note Writer to produce high quality notes. Our framework and in-house models outperform similarly sized open-source models by over 12% on ROUGE metrics.Additionally, these models match top closed-source LLMs’ performance while being under 1% of their size. We specifically evaluate our framework for oncology, with the potential for adaptation to other specialties.
%R 10.18653/v1/2025.cl4health-1.4
%U https://aclanthology.org/2025.cl4health-1.4/
%U https://doi.org/10.18653/v1/2025.cl4health-1.4
%P 34-45
Markdown (Informal)
[SpecialtyScribe: Enhancing SOAP note Scribing for Medical Specialties using LLM’s](https://aclanthology.org/2025.cl4health-1.4/) (Goyal et al., CL4Health 2025)
ACL