@inproceedings{dey-etal-2026-cuet-diagnlp,
title = "{CUET}{\_}{D}iag{NLP} at {\#}{SMM}4{H}-{H}ea{RD} 2026: Per-Axis {TNM} Staging from Pathology Reports and Opioid Impact Span Detection from Social Media",
author = "Dey, Shuva and
Barua, Priyangshu and
Habib, Mohammad Ashfak",
editor = "Lopez-Garcia, Guillermo and
Gonzalez-Hernandez, Graciela",
booktitle = "Proceedings of the 11th Social Media Mining for Health Research and Applications ({SMM}4{H}-{H}ea{RD} 2026) Workshop and Shared Tasks",
month = jul,
year = "2026",
address = "San Diego, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.smm4h-1.30/",
pages = "182--186",
ISBN = "979-8-89176-432-3",
abstract = "In this paper, we describe systems for two {\#}SMM4H-HeaRD 2026 shared tasks. Task 6 asks for per-axis TNM cancer staging from free-text TCGA pathology reports under severe label imbalance and long-document constraints. We fine-tune GatorTron-base separately on each axis using Focal loss with class weights and a pooled [CLS]{--}mean representation, reaching macro F1 of 0.700 (T), 0.774 (N), and 0.640 (M) on test set 2 against a baseline of 0.454, 0.591, and 0.554 respectively. Task 7 asks for span-level detection of opioid-related ClinicalImpacts and SocialImpacts in first-person Reddit posts. We combine DeBERTa-large and PubMedBERT (two seeds each) in a uniform-weight ensemble with boundary-aware loss, entity-replacement augmentation, and a first-person post filter, achieving strict F1 of 0.51 and relaxed F1 of 0.60, above both the task mean (0.46 / 0.55) and median (0.48 / 0.58)."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dey-etal-2026-cuet-diagnlp">
<titleInfo>
<title>CUET_DiagNLP at #SMM4H-HeaRD 2026: Per-Axis TNM Staging from Pathology Reports and Opioid Impact Span Detection from Social Media</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shuva</namePart>
<namePart type="family">Dey</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Priyangshu</namePart>
<namePart type="family">Barua</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Ashfak</namePart>
<namePart type="family">Habib</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 11th Social Media Mining for Health Research and Applications (SMM4H-HeaRD 2026) Workshop and Shared Tasks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Guillermo</namePart>
<namePart type="family">Lopez-Garcia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Graciela</namePart>
<namePart type="family">Gonzalez-Hernandez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-432-3</identifier>
</relatedItem>
<abstract>In this paper, we describe systems for two #SMM4H-HeaRD 2026 shared tasks. Task 6 asks for per-axis TNM cancer staging from free-text TCGA pathology reports under severe label imbalance and long-document constraints. We fine-tune GatorTron-base separately on each axis using Focal loss with class weights and a pooled [CLS]–mean representation, reaching macro F1 of 0.700 (T), 0.774 (N), and 0.640 (M) on test set 2 against a baseline of 0.454, 0.591, and 0.554 respectively. Task 7 asks for span-level detection of opioid-related ClinicalImpacts and SocialImpacts in first-person Reddit posts. We combine DeBERTa-large and PubMedBERT (two seeds each) in a uniform-weight ensemble with boundary-aware loss, entity-replacement augmentation, and a first-person post filter, achieving strict F1 of 0.51 and relaxed F1 of 0.60, above both the task mean (0.46 / 0.55) and median (0.48 / 0.58).</abstract>
<identifier type="citekey">dey-etal-2026-cuet-diagnlp</identifier>
<location>
<url>https://aclanthology.org/2026.smm4h-1.30/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>182</start>
<end>186</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CUET_DiagNLP at #SMM4H-HeaRD 2026: Per-Axis TNM Staging from Pathology Reports and Opioid Impact Span Detection from Social Media
%A Dey, Shuva
%A Barua, Priyangshu
%A Habib, Mohammad Ashfak
%Y Lopez-Garcia, Guillermo
%Y Gonzalez-Hernandez, Graciela
%S Proceedings of the 11th Social Media Mining for Health Research and Applications (SMM4H-HeaRD 2026) Workshop and Shared Tasks
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, United States
%@ 979-8-89176-432-3
%F dey-etal-2026-cuet-diagnlp
%X In this paper, we describe systems for two #SMM4H-HeaRD 2026 shared tasks. Task 6 asks for per-axis TNM cancer staging from free-text TCGA pathology reports under severe label imbalance and long-document constraints. We fine-tune GatorTron-base separately on each axis using Focal loss with class weights and a pooled [CLS]–mean representation, reaching macro F1 of 0.700 (T), 0.774 (N), and 0.640 (M) on test set 2 against a baseline of 0.454, 0.591, and 0.554 respectively. Task 7 asks for span-level detection of opioid-related ClinicalImpacts and SocialImpacts in first-person Reddit posts. We combine DeBERTa-large and PubMedBERT (two seeds each) in a uniform-weight ensemble with boundary-aware loss, entity-replacement augmentation, and a first-person post filter, achieving strict F1 of 0.51 and relaxed F1 of 0.60, above both the task mean (0.46 / 0.55) and median (0.48 / 0.58).
%U https://aclanthology.org/2026.smm4h-1.30/
%P 182-186
Markdown (Informal)
[CUET_DiagNLP at #SMM4H-HeaRD 2026: Per-Axis TNM Staging from Pathology Reports and Opioid Impact Span Detection from Social Media](https://aclanthology.org/2026.smm4h-1.30/) (Dey et al., SMM4H 2026)
ACL