@inproceedings{singh-2026-gladiators,
title = "Gladiators at {\#}{SMM}4{H}{--}{H}ea{RD} 2026: Multi-Seed {XLM}-{R}o{BERT}a Ensemble with Focal Loss and Per-Language Threshold Optimization for Multilingual Adverse Drug Event Detection",
author = "Singh, Ankit Kumar",
editor = "Lopez-Garcia, Guillermo and
Gonzalez-Hernandez, Graciela",
booktitle = "Proceedings of the 11th Social Media Mining for Health Research and Applications ({SMM}4{H}-{H}ea{RD} 2026) Workshop and Shared Tasks",
month = jul,
year = "2026",
address = "San Diego, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.smm4h-1.2/",
pages = "7--11",
ISBN = "979-8-89176-432-3",
abstract = "This paper describes the Gladiators system for Task 1 of the SMM4H 2026 shared task on binary classification of adverse drug event (ADE) mentions in multilingual social media posts. Our system fine-tunes three XLM-RoBERTa large models with different random seeds using focal loss ({\ensuremath{\alpha}}=0.75, {\ensuremath{\gamma}}=2.0) and 3{\texttimes} positive oversampling, then averages their predicted probabilities and applies per-language threshold optimization. On the development set, our ensemble achieves a pooled binary F1 of 0.7505. On the official test set{---}which introduced surprise Farsi comprising 35.5{\%} of samples{---}our system achieves F1 = 0.6039, above the competition mean (0.5465) and median (0.5798). We evaluated eleven approaches and document key negative results. Post evaluation, a six-model cross-regime ensembleimproved dev F1 to 0.7585."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="singh-2026-gladiators">
<titleInfo>
<title>Gladiators at #SMM4H–HeaRD 2026: Multi-Seed XLM-RoBERTa Ensemble with Focal Loss and Per-Language Threshold Optimization for Multilingual Adverse Drug Event Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ankit</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 11th Social Media Mining for Health Research and Applications (SMM4H-HeaRD 2026) Workshop and Shared Tasks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Guillermo</namePart>
<namePart type="family">Lopez-Garcia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Graciela</namePart>
<namePart type="family">Gonzalez-Hernandez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-432-3</identifier>
</relatedItem>
<abstract>This paper describes the Gladiators system for Task 1 of the SMM4H 2026 shared task on binary classification of adverse drug event (ADE) mentions in multilingual social media posts. Our system fine-tunes three XLM-RoBERTa large models with different random seeds using focal loss (\ensuremathα=0.75, \ensuremathγ=2.0) and 3× positive oversampling, then averages their predicted probabilities and applies per-language threshold optimization. On the development set, our ensemble achieves a pooled binary F1 of 0.7505. On the official test set—which introduced surprise Farsi comprising 35.5% of samples—our system achieves F1 = 0.6039, above the competition mean (0.5465) and median (0.5798). We evaluated eleven approaches and document key negative results. Post evaluation, a six-model cross-regime ensembleimproved dev F1 to 0.7585.</abstract>
<identifier type="citekey">singh-2026-gladiators</identifier>
<location>
<url>https://aclanthology.org/2026.smm4h-1.2/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>7</start>
<end>11</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Gladiators at #SMM4H–HeaRD 2026: Multi-Seed XLM-RoBERTa Ensemble with Focal Loss and Per-Language Threshold Optimization for Multilingual Adverse Drug Event Detection
%A Singh, Ankit Kumar
%Y Lopez-Garcia, Guillermo
%Y Gonzalez-Hernandez, Graciela
%S Proceedings of the 11th Social Media Mining for Health Research and Applications (SMM4H-HeaRD 2026) Workshop and Shared Tasks
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, United States
%@ 979-8-89176-432-3
%F singh-2026-gladiators
%X This paper describes the Gladiators system for Task 1 of the SMM4H 2026 shared task on binary classification of adverse drug event (ADE) mentions in multilingual social media posts. Our system fine-tunes three XLM-RoBERTa large models with different random seeds using focal loss (\ensuremathα=0.75, \ensuremathγ=2.0) and 3× positive oversampling, then averages their predicted probabilities and applies per-language threshold optimization. On the development set, our ensemble achieves a pooled binary F1 of 0.7505. On the official test set—which introduced surprise Farsi comprising 35.5% of samples—our system achieves F1 = 0.6039, above the competition mean (0.5465) and median (0.5798). We evaluated eleven approaches and document key negative results. Post evaluation, a six-model cross-regime ensembleimproved dev F1 to 0.7585.
%U https://aclanthology.org/2026.smm4h-1.2/
%P 7-11
Markdown (Informal)
[Gladiators at #SMM4H–HeaRD 2026: Multi-Seed XLM-RoBERTa Ensemble with Focal Loss and Per-Language Threshold Optimization for Multilingual Adverse Drug Event Detection](https://aclanthology.org/2026.smm4h-1.2/) (Singh, SMM4H 2026)
ACL