@inproceedings{naskar-conway-2026-novicetrio,
title = "{N}ovice{T}rio in {\#}{SMM}4{H}-{H}ea{RD} 2026: Hybrid Clinical Transformer Ensembles for Insomnia Detection and Evidence Extraction from Clinical Notes",
author = "Naskar, Abir and
Conway, Mike",
editor = "Lopez-Garcia, Guillermo and
Gonzalez-Hernandez, Graciela",
booktitle = "Proceedings of the 11th Social Media Mining for Health Research and Applications ({SMM}4{H}-{H}ea{RD} 2026) Workshop and Shared Tasks",
month = jul,
year = "2026",
address = "San Diego, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.smm4h-1.51/",
pages = "338--344",
ISBN = "979-8-89176-432-3",
abstract = "We present two systems for the {\#}SMM4H-HeaRD 2026 Task 2 shared task of automated insomnia detection from clinical notes. Our system addresses both subtasks: (1) binary insomnia classification and (2) multi-label rule prediction with evidence span extraction. For Subtask 1, we employ an ensemble architecture combining Qwen3-4B-Instruct and Bio{\_}ClinicalBERT to capture both general semantic reasoning and domain-specific clinical representations. The framework utilizes chunk-based processing with overlapping token windows to handle long clinical notes efficiently. For Subtask 2, we develop a dual-head multi-task transformer model that jointly predicts insomnia labels and token-level evidence spans using BIO tagging. To improve clinical relevance, we additionally incorporate sentence-level filtering using sentence-transformer embeddings and similarity-based retrieval of insomnia-related contexts. Experimental results suggest competitive performance relative to the shared task mean and median scores across both subtasks. Our best Subtask 1 system achieves a recall of 0.9474, surpassing the shared-task mean and median recall, while our Subtask 2 system exceeds the mean and median scores across label classification, exact match, and partial match metrics. The end-to-end implementation is publicly available on GitHub."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="naskar-conway-2026-novicetrio">
<titleInfo>
<title>NoviceTrio in #SMM4H-HeaRD 2026: Hybrid Clinical Transformer Ensembles for Insomnia Detection and Evidence Extraction from Clinical Notes</title>
</titleInfo>
<name type="personal">
<namePart type="given">Abir</namePart>
<namePart type="family">Naskar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mike</namePart>
<namePart type="family">Conway</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 11th Social Media Mining for Health Research and Applications (SMM4H-HeaRD 2026) Workshop and Shared Tasks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Guillermo</namePart>
<namePart type="family">Lopez-Garcia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Graciela</namePart>
<namePart type="family">Gonzalez-Hernandez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-432-3</identifier>
</relatedItem>
<abstract>We present two systems for the #SMM4H-HeaRD 2026 Task 2 shared task of automated insomnia detection from clinical notes. Our system addresses both subtasks: (1) binary insomnia classification and (2) multi-label rule prediction with evidence span extraction. For Subtask 1, we employ an ensemble architecture combining Qwen3-4B-Instruct and Bio_ClinicalBERT to capture both general semantic reasoning and domain-specific clinical representations. The framework utilizes chunk-based processing with overlapping token windows to handle long clinical notes efficiently. For Subtask 2, we develop a dual-head multi-task transformer model that jointly predicts insomnia labels and token-level evidence spans using BIO tagging. To improve clinical relevance, we additionally incorporate sentence-level filtering using sentence-transformer embeddings and similarity-based retrieval of insomnia-related contexts. Experimental results suggest competitive performance relative to the shared task mean and median scores across both subtasks. Our best Subtask 1 system achieves a recall of 0.9474, surpassing the shared-task mean and median recall, while our Subtask 2 system exceeds the mean and median scores across label classification, exact match, and partial match metrics. The end-to-end implementation is publicly available on GitHub.</abstract>
<identifier type="citekey">naskar-conway-2026-novicetrio</identifier>
<location>
<url>https://aclanthology.org/2026.smm4h-1.51/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>338</start>
<end>344</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T NoviceTrio in #SMM4H-HeaRD 2026: Hybrid Clinical Transformer Ensembles for Insomnia Detection and Evidence Extraction from Clinical Notes
%A Naskar, Abir
%A Conway, Mike
%Y Lopez-Garcia, Guillermo
%Y Gonzalez-Hernandez, Graciela
%S Proceedings of the 11th Social Media Mining for Health Research and Applications (SMM4H-HeaRD 2026) Workshop and Shared Tasks
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, United States
%@ 979-8-89176-432-3
%F naskar-conway-2026-novicetrio
%X We present two systems for the #SMM4H-HeaRD 2026 Task 2 shared task of automated insomnia detection from clinical notes. Our system addresses both subtasks: (1) binary insomnia classification and (2) multi-label rule prediction with evidence span extraction. For Subtask 1, we employ an ensemble architecture combining Qwen3-4B-Instruct and Bio_ClinicalBERT to capture both general semantic reasoning and domain-specific clinical representations. The framework utilizes chunk-based processing with overlapping token windows to handle long clinical notes efficiently. For Subtask 2, we develop a dual-head multi-task transformer model that jointly predicts insomnia labels and token-level evidence spans using BIO tagging. To improve clinical relevance, we additionally incorporate sentence-level filtering using sentence-transformer embeddings and similarity-based retrieval of insomnia-related contexts. Experimental results suggest competitive performance relative to the shared task mean and median scores across both subtasks. Our best Subtask 1 system achieves a recall of 0.9474, surpassing the shared-task mean and median recall, while our Subtask 2 system exceeds the mean and median scores across label classification, exact match, and partial match metrics. The end-to-end implementation is publicly available on GitHub.
%U https://aclanthology.org/2026.smm4h-1.51/
%P 338-344
Markdown (Informal)
[NoviceTrio in #SMM4H-HeaRD 2026: Hybrid Clinical Transformer Ensembles for Insomnia Detection and Evidence Extraction from Clinical Notes](https://aclanthology.org/2026.smm4h-1.51/) (Naskar & Conway, SMM4H 2026)
ACL