@inproceedings{zelina-novacek-2026-discovery,
title = "Discovery@{FI} at {\#}{SMM}4{H}{--}{H}ea{RD} 2026: Ensemble Character Classifier for Multilingual Clinical {NER}",
author = "Zelina, Petr and
Novacek, Vit",
editor = "Lopez-Garcia, Guillermo and
Gonzalez-Hernandez, Graciela",
booktitle = "Proceedings of the 11th Social Media Mining for Health Research and Applications ({SMM}4{H}-{H}ea{RD} 2026) Workshop and Shared Tasks",
month = jul,
year = "2026",
address = "San Diego, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.smm4h-1.28/",
pages = "173--176",
ISBN = "979-8-89176-432-3",
abstract = "We present a system for multilingual clinical named entity recognition (NER) submitted to the MultiClinNER subtask of MultiClinAI 2026, covering all seven languages and three entity classes (disease, symptom, procedure).Our approach trains one binary token classifier ensemble per entity class using cross-lingual fine-tuning of XLM-RoBERTa-large, with all languages handled jointly.We apply character-level ensembling over six models (two encoder variants $\times$ three cross-validation folds).This ensembling method provides more granular probability estimates than single-model classifiers, allowing for more flexible precision-recall trade-off tuning.The system achieves character-level F1 scores of 0.70{--}0.88 on the official test set."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zelina-novacek-2026-discovery">
<titleInfo>
<title>Discovery@FI at #SMM4H–HeaRD 2026: Ensemble Character Classifier for Multilingual Clinical NER</title>
</titleInfo>
<name type="personal">
<namePart type="given">Petr</namePart>
<namePart type="family">Zelina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vit</namePart>
<namePart type="family">Novacek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 11th Social Media Mining for Health Research and Applications (SMM4H-HeaRD 2026) Workshop and Shared Tasks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Guillermo</namePart>
<namePart type="family">Lopez-Garcia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Graciela</namePart>
<namePart type="family">Gonzalez-Hernandez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-432-3</identifier>
</relatedItem>
<abstract>We present a system for multilingual clinical named entity recognition (NER) submitted to the MultiClinNER subtask of MultiClinAI 2026, covering all seven languages and three entity classes (disease, symptom, procedure).Our approach trains one binary token classifier ensemble per entity class using cross-lingual fine-tuning of XLM-RoBERTa-large, with all languages handled jointly.We apply character-level ensembling over six models (two encoder variants \times three cross-validation folds).This ensembling method provides more granular probability estimates than single-model classifiers, allowing for more flexible precision-recall trade-off tuning.The system achieves character-level F1 scores of 0.70–0.88 on the official test set.</abstract>
<identifier type="citekey">zelina-novacek-2026-discovery</identifier>
<location>
<url>https://aclanthology.org/2026.smm4h-1.28/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>173</start>
<end>176</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Discovery@FI at #SMM4H–HeaRD 2026: Ensemble Character Classifier for Multilingual Clinical NER
%A Zelina, Petr
%A Novacek, Vit
%Y Lopez-Garcia, Guillermo
%Y Gonzalez-Hernandez, Graciela
%S Proceedings of the 11th Social Media Mining for Health Research and Applications (SMM4H-HeaRD 2026) Workshop and Shared Tasks
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, United States
%@ 979-8-89176-432-3
%F zelina-novacek-2026-discovery
%X We present a system for multilingual clinical named entity recognition (NER) submitted to the MultiClinNER subtask of MultiClinAI 2026, covering all seven languages and three entity classes (disease, symptom, procedure).Our approach trains one binary token classifier ensemble per entity class using cross-lingual fine-tuning of XLM-RoBERTa-large, with all languages handled jointly.We apply character-level ensembling over six models (two encoder variants \times three cross-validation folds).This ensembling method provides more granular probability estimates than single-model classifiers, allowing for more flexible precision-recall trade-off tuning.The system achieves character-level F1 scores of 0.70–0.88 on the official test set.
%U https://aclanthology.org/2026.smm4h-1.28/
%P 173-176
Markdown (Informal)
[Discovery@FI at #SMM4H–HeaRD 2026: Ensemble Character Classifier for Multilingual Clinical NER](https://aclanthology.org/2026.smm4h-1.28/) (Zelina & Novacek, SMM4H 2026)
ACL