@inproceedings{cristea-2026-smmtech,
title = "{SMMT}ech at {\#}{SMM}4{H}-{H}ea{RD} 2026: Detection of Insomnia in Clinical Notes",
author = "Cristea, Emilia-Ioana",
editor = "Lopez-Garcia, Guillermo and
Gonzalez-Hernandez, Graciela",
booktitle = "Proceedings of the 11th Social Media Mining for Health Research and Applications ({SMM}4{H}-{H}ea{RD} 2026) Workshop and Shared Tasks",
month = jul,
year = "2026",
address = "San Diego, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.smm4h-1.15/",
pages = "88--92",
ISBN = "979-8-89176-432-3",
abstract = "This paper describes the participation of team SMMTech in the SMM4H-HeaRD 2026 Shared Task 2: Detection of Insomnia in Clinical Notes. We present a comparative architectural study exploring the friction between extractive token-classification models and generative Large Language Models (LLMs) in clinical span extraction, on the MIMIC-III Clinical Database. During the validation phase we established baselines using encoder-only transformers such as BERT, ClinicalBERT, BigBird and Clinical BigBird. For the official test phase, we deployed a 4-bit quantized generative hybrid pipeline using Llama3-Med42-8B to evaluate its multi-hop reasoning capabilities. While the generative pipeline achieved an F1-score of 0.4783 on Subtask 1 (Classification), it struggled with exact span matching on Subtask 2.In this paper we present the mechanical limitations of zero-shot JSON extraction and the necessity of decoupling clinical reasoning from character-level span extraction."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cristea-2026-smmtech">
<titleInfo>
<title>SMMTech at #SMM4H-HeaRD 2026: Detection of Insomnia in Clinical Notes</title>
</titleInfo>
<name type="personal">
<namePart type="given">Emilia-Ioana</namePart>
<namePart type="family">Cristea</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 11th Social Media Mining for Health Research and Applications (SMM4H-HeaRD 2026) Workshop and Shared Tasks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Guillermo</namePart>
<namePart type="family">Lopez-Garcia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Graciela</namePart>
<namePart type="family">Gonzalez-Hernandez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-432-3</identifier>
</relatedItem>
<abstract>This paper describes the participation of team SMMTech in the SMM4H-HeaRD 2026 Shared Task 2: Detection of Insomnia in Clinical Notes. We present a comparative architectural study exploring the friction between extractive token-classification models and generative Large Language Models (LLMs) in clinical span extraction, on the MIMIC-III Clinical Database. During the validation phase we established baselines using encoder-only transformers such as BERT, ClinicalBERT, BigBird and Clinical BigBird. For the official test phase, we deployed a 4-bit quantized generative hybrid pipeline using Llama3-Med42-8B to evaluate its multi-hop reasoning capabilities. While the generative pipeline achieved an F1-score of 0.4783 on Subtask 1 (Classification), it struggled with exact span matching on Subtask 2.In this paper we present the mechanical limitations of zero-shot JSON extraction and the necessity of decoupling clinical reasoning from character-level span extraction.</abstract>
<identifier type="citekey">cristea-2026-smmtech</identifier>
<location>
<url>https://aclanthology.org/2026.smm4h-1.15/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>88</start>
<end>92</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SMMTech at #SMM4H-HeaRD 2026: Detection of Insomnia in Clinical Notes
%A Cristea, Emilia-Ioana
%Y Lopez-Garcia, Guillermo
%Y Gonzalez-Hernandez, Graciela
%S Proceedings of the 11th Social Media Mining for Health Research and Applications (SMM4H-HeaRD 2026) Workshop and Shared Tasks
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, United States
%@ 979-8-89176-432-3
%F cristea-2026-smmtech
%X This paper describes the participation of team SMMTech in the SMM4H-HeaRD 2026 Shared Task 2: Detection of Insomnia in Clinical Notes. We present a comparative architectural study exploring the friction between extractive token-classification models and generative Large Language Models (LLMs) in clinical span extraction, on the MIMIC-III Clinical Database. During the validation phase we established baselines using encoder-only transformers such as BERT, ClinicalBERT, BigBird and Clinical BigBird. For the official test phase, we deployed a 4-bit quantized generative hybrid pipeline using Llama3-Med42-8B to evaluate its multi-hop reasoning capabilities. While the generative pipeline achieved an F1-score of 0.4783 on Subtask 1 (Classification), it struggled with exact span matching on Subtask 2.In this paper we present the mechanical limitations of zero-shot JSON extraction and the necessity of decoupling clinical reasoning from character-level span extraction.
%U https://aclanthology.org/2026.smm4h-1.15/
%P 88-92
Markdown (Informal)
[SMMTech at #SMM4H-HeaRD 2026: Detection of Insomnia in Clinical Notes](https://aclanthology.org/2026.smm4h-1.15/) (Cristea, SMM4H 2026)
ACL