@inproceedings{kaur-etal-2026-team,
title = "Team {TIET} at {\#}{SMM}4{H}-{H}ea{RD} 2026: Fine-tuned Biomedical Transformers with Language-Balanced Sampling for Patient Metadata and Multilingual {ADE} Detection",
author = "Kaur, Divrose and
Bedi, Jatin and
Singh, Jasmeet",
editor = "Lopez-Garcia, Guillermo and
Gonzalez-Hernandez, Graciela",
booktitle = "Proceedings of the 11th Social Media Mining for Health Research and Applications ({SMM}4{H}-{H}ea{RD} 2026) Workshop and Shared Tasks",
month = jul,
year = "2026",
address = "San Diego, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.smm4h-1.44/",
pages = "268--271",
ISBN = "979-8-89176-432-3",
abstract = "We present Team TIET{'}s systems for two shared tasks at {\#}SMM4H-HeaRD 2026: Task 5 (detection of patient metadata in SARS-CoV-2 sequencing papers) and Task 1 (multilingual adverse drug event detection across six languages plus an unseen Farsi subset). For Task 5 we explore iterative LLM prompting followed by fine-tuning BiomedBERT-base with weighted cross-entropy loss and probability threshold optimization, achieving F1 = 0.760 on the official test set (above the competition mean of 0.729). For Task 1 we fine-tune XLM-RoBERTa-base with a combined language- and class-balanced sampling strategy and per-language threshold tuning, achieving macro F1 = 0.497 overall (0.608 excluding the unseen Farsi subset). We report empirical findings on BERT+LLM ensemble failure with bimodal probability distributions, the superiority of base over large model variants under limited data, and the importance of language-balanced gradient contribution in multilingual classification."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kaur-etal-2026-team">
<titleInfo>
<title>Team TIET at #SMM4H-HeaRD 2026: Fine-tuned Biomedical Transformers with Language-Balanced Sampling for Patient Metadata and Multilingual ADE Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Divrose</namePart>
<namePart type="family">Kaur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jatin</namePart>
<namePart type="family">Bedi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jasmeet</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 11th Social Media Mining for Health Research and Applications (SMM4H-HeaRD 2026) Workshop and Shared Tasks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Guillermo</namePart>
<namePart type="family">Lopez-Garcia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Graciela</namePart>
<namePart type="family">Gonzalez-Hernandez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-432-3</identifier>
</relatedItem>
<abstract>We present Team TIET’s systems for two shared tasks at #SMM4H-HeaRD 2026: Task 5 (detection of patient metadata in SARS-CoV-2 sequencing papers) and Task 1 (multilingual adverse drug event detection across six languages plus an unseen Farsi subset). For Task 5 we explore iterative LLM prompting followed by fine-tuning BiomedBERT-base with weighted cross-entropy loss and probability threshold optimization, achieving F1 = 0.760 on the official test set (above the competition mean of 0.729). For Task 1 we fine-tune XLM-RoBERTa-base with a combined language- and class-balanced sampling strategy and per-language threshold tuning, achieving macro F1 = 0.497 overall (0.608 excluding the unseen Farsi subset). We report empirical findings on BERT+LLM ensemble failure with bimodal probability distributions, the superiority of base over large model variants under limited data, and the importance of language-balanced gradient contribution in multilingual classification.</abstract>
<identifier type="citekey">kaur-etal-2026-team</identifier>
<location>
<url>https://aclanthology.org/2026.smm4h-1.44/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>268</start>
<end>271</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Team TIET at #SMM4H-HeaRD 2026: Fine-tuned Biomedical Transformers with Language-Balanced Sampling for Patient Metadata and Multilingual ADE Detection
%A Kaur, Divrose
%A Bedi, Jatin
%A Singh, Jasmeet
%Y Lopez-Garcia, Guillermo
%Y Gonzalez-Hernandez, Graciela
%S Proceedings of the 11th Social Media Mining for Health Research and Applications (SMM4H-HeaRD 2026) Workshop and Shared Tasks
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, United States
%@ 979-8-89176-432-3
%F kaur-etal-2026-team
%X We present Team TIET’s systems for two shared tasks at #SMM4H-HeaRD 2026: Task 5 (detection of patient metadata in SARS-CoV-2 sequencing papers) and Task 1 (multilingual adverse drug event detection across six languages plus an unseen Farsi subset). For Task 5 we explore iterative LLM prompting followed by fine-tuning BiomedBERT-base with weighted cross-entropy loss and probability threshold optimization, achieving F1 = 0.760 on the official test set (above the competition mean of 0.729). For Task 1 we fine-tune XLM-RoBERTa-base with a combined language- and class-balanced sampling strategy and per-language threshold tuning, achieving macro F1 = 0.497 overall (0.608 excluding the unseen Farsi subset). We report empirical findings on BERT+LLM ensemble failure with bimodal probability distributions, the superiority of base over large model variants under limited data, and the importance of language-balanced gradient contribution in multilingual classification.
%U https://aclanthology.org/2026.smm4h-1.44/
%P 268-271
Markdown (Informal)
[Team TIET at #SMM4H-HeaRD 2026: Fine-tuned Biomedical Transformers with Language-Balanced Sampling for Patient Metadata and Multilingual ADE Detection](https://aclanthology.org/2026.smm4h-1.44/) (Kaur et al., SMM4H 2026)
ACL