@inproceedings{santhosh-yu-2026-nu,
title = "{NU}{\_}{D}eep{H}ealth{NLP} at {\#}{SMM}4{H}-{H}ea{RD} 2026: Entity-Conditioned Generation and a Four-Stage Pipeline for Automated {SOAP} Note Generation",
author = "Santhosh, Thanya Mysore and
Yu, Deahan",
editor = "Lopez-Garcia, Guillermo and
Gonzalez-Hernandez, Graciela",
booktitle = "Proceedings of the 11th Social Media Mining for Health Research and Applications ({SMM}4{H}-{H}ea{RD} 2026) Workshop and Shared Tasks",
month = jul,
year = "2026",
address = "San Diego, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.smm4h-1.17/",
pages = "103--107",
ISBN = "979-8-89176-432-3",
abstract = "We describe two system submissions to Task 4 of the SMM4H-HeaRD 2026 Shared Task on automated SOAP note generation from doctor{--}patient dialogues. Our first submission is a standalone entity-conditioned generation model: Mistral-7B-Instruct-v0.1 fine-tuned with QLoRA on 8,529 MedSynth training dialogues, where both training and inference prompts include clinical entities extracted and grouped by SOAP section. Our second submission is a four-stage modular pipeline that additionally incorporates a hybrid retrieval stage and a rule-based verification stage. The key finding of this work is that incorporating structured clinical domain knowledge, in the form of NER entities grouped by SOAP section, directly into the generation prompt produces consistent and reliable improvements over dialogue-only generation. Our four-stage pipeline submission achieved an average score of 0.54 on the official test set, ranking first on the shared task leaderboard."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="santhosh-yu-2026-nu">
<titleInfo>
<title>NU_DeepHealthNLP at #SMM4H-HeaRD 2026: Entity-Conditioned Generation and a Four-Stage Pipeline for Automated SOAP Note Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Thanya</namePart>
<namePart type="given">Mysore</namePart>
<namePart type="family">Santhosh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Deahan</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 11th Social Media Mining for Health Research and Applications (SMM4H-HeaRD 2026) Workshop and Shared Tasks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Guillermo</namePart>
<namePart type="family">Lopez-Garcia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Graciela</namePart>
<namePart type="family">Gonzalez-Hernandez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-432-3</identifier>
</relatedItem>
<abstract>We describe two system submissions to Task 4 of the SMM4H-HeaRD 2026 Shared Task on automated SOAP note generation from doctor–patient dialogues. Our first submission is a standalone entity-conditioned generation model: Mistral-7B-Instruct-v0.1 fine-tuned with QLoRA on 8,529 MedSynth training dialogues, where both training and inference prompts include clinical entities extracted and grouped by SOAP section. Our second submission is a four-stage modular pipeline that additionally incorporates a hybrid retrieval stage and a rule-based verification stage. The key finding of this work is that incorporating structured clinical domain knowledge, in the form of NER entities grouped by SOAP section, directly into the generation prompt produces consistent and reliable improvements over dialogue-only generation. Our four-stage pipeline submission achieved an average score of 0.54 on the official test set, ranking first on the shared task leaderboard.</abstract>
<identifier type="citekey">santhosh-yu-2026-nu</identifier>
<location>
<url>https://aclanthology.org/2026.smm4h-1.17/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>103</start>
<end>107</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T NU_DeepHealthNLP at #SMM4H-HeaRD 2026: Entity-Conditioned Generation and a Four-Stage Pipeline for Automated SOAP Note Generation
%A Santhosh, Thanya Mysore
%A Yu, Deahan
%Y Lopez-Garcia, Guillermo
%Y Gonzalez-Hernandez, Graciela
%S Proceedings of the 11th Social Media Mining for Health Research and Applications (SMM4H-HeaRD 2026) Workshop and Shared Tasks
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, United States
%@ 979-8-89176-432-3
%F santhosh-yu-2026-nu
%X We describe two system submissions to Task 4 of the SMM4H-HeaRD 2026 Shared Task on automated SOAP note generation from doctor–patient dialogues. Our first submission is a standalone entity-conditioned generation model: Mistral-7B-Instruct-v0.1 fine-tuned with QLoRA on 8,529 MedSynth training dialogues, where both training and inference prompts include clinical entities extracted and grouped by SOAP section. Our second submission is a four-stage modular pipeline that additionally incorporates a hybrid retrieval stage and a rule-based verification stage. The key finding of this work is that incorporating structured clinical domain knowledge, in the form of NER entities grouped by SOAP section, directly into the generation prompt produces consistent and reliable improvements over dialogue-only generation. Our four-stage pipeline submission achieved an average score of 0.54 on the official test set, ranking first on the shared task leaderboard.
%U https://aclanthology.org/2026.smm4h-1.17/
%P 103-107
Markdown (Informal)
[NU_DeepHealthNLP at #SMM4H-HeaRD 2026: Entity-Conditioned Generation and a Four-Stage Pipeline for Automated SOAP Note Generation](https://aclanthology.org/2026.smm4h-1.17/) (Santhosh & Yu, SMM4H 2026)
ACL