@inproceedings{jonker-matos-2026-bit,
title = "{BIT}.{UA} at {\#}{SMM}4{H}{--}{H}ea{RD} 2026: Towards Multi-Class Multilingual Clinical Entity Recognition with Multi-Head {CRF} Ensembles",
author = "Jonker, Richard A. A. and
Matos, S{\'e}rgio",
editor = "Lopez-Garcia, Guillermo and
Gonzalez-Hernandez, Graciela",
booktitle = "Proceedings of the 11th Social Media Mining for Health Research and Applications ({SMM}4{H}-{H}ea{RD} 2026) Workshop and Shared Tasks",
month = jul,
year = "2026",
address = "San Diego, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.smm4h-1.8/",
pages = "41--48",
ISBN = "979-8-89176-432-3",
abstract = "This paper describes the BIT.UA system for the MultiClinNER shared task at {\#}SMM4H{--}HeaRD 2026, targeting multilingual clinical named entity recognition across seven languages for three entity types (Disease, Procedure, Symptom). We extend the Multi-Head CRF architecture, originally developed for multi-class NER on Spanish clinical text, to the multilingual setting. To enable joint multi-entity training despite per-entity text variations in the dataset, we develop an adaptive text consolidation pipeline that preserves over 94{\%} of annotations. Our central finding is that a single xlm-roberta-large model, trained jointly on all seven languages and three entity types, achieves competition rank 2 for five of seven languages, outperforming dedicated monolingual models by up to +6.94 F1 points, while requiring only a single set of weights. Ensembling multiple seeds of this model achieves rank 1 for those five languages, and combining it with monolingual models yields rank 1 for the remaining two. Code and models are publicly available at https://github.com/ieeta-pt/Multi-Head-CRF/tree/MultiClinNER and https://huggingface.co/collections/IEETA/multiclinner-models."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jonker-matos-2026-bit">
<titleInfo>
<title>BIT.UA at #SMM4H–HeaRD 2026: Towards Multi-Class Multilingual Clinical Entity Recognition with Multi-Head CRF Ensembles</title>
</titleInfo>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="given">A</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Jonker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sérgio</namePart>
<namePart type="family">Matos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 11th Social Media Mining for Health Research and Applications (SMM4H-HeaRD 2026) Workshop and Shared Tasks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Guillermo</namePart>
<namePart type="family">Lopez-Garcia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Graciela</namePart>
<namePart type="family">Gonzalez-Hernandez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-432-3</identifier>
</relatedItem>
<abstract>This paper describes the BIT.UA system for the MultiClinNER shared task at #SMM4H–HeaRD 2026, targeting multilingual clinical named entity recognition across seven languages for three entity types (Disease, Procedure, Symptom). We extend the Multi-Head CRF architecture, originally developed for multi-class NER on Spanish clinical text, to the multilingual setting. To enable joint multi-entity training despite per-entity text variations in the dataset, we develop an adaptive text consolidation pipeline that preserves over 94% of annotations. Our central finding is that a single xlm-roberta-large model, trained jointly on all seven languages and three entity types, achieves competition rank 2 for five of seven languages, outperforming dedicated monolingual models by up to +6.94 F1 points, while requiring only a single set of weights. Ensembling multiple seeds of this model achieves rank 1 for those five languages, and combining it with monolingual models yields rank 1 for the remaining two. Code and models are publicly available at https://github.com/ieeta-pt/Multi-Head-CRF/tree/MultiClinNER and https://huggingface.co/collections/IEETA/multiclinner-models.</abstract>
<identifier type="citekey">jonker-matos-2026-bit</identifier>
<location>
<url>https://aclanthology.org/2026.smm4h-1.8/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>41</start>
<end>48</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BIT.UA at #SMM4H–HeaRD 2026: Towards Multi-Class Multilingual Clinical Entity Recognition with Multi-Head CRF Ensembles
%A Jonker, Richard A. A.
%A Matos, Sérgio
%Y Lopez-Garcia, Guillermo
%Y Gonzalez-Hernandez, Graciela
%S Proceedings of the 11th Social Media Mining for Health Research and Applications (SMM4H-HeaRD 2026) Workshop and Shared Tasks
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, United States
%@ 979-8-89176-432-3
%F jonker-matos-2026-bit
%X This paper describes the BIT.UA system for the MultiClinNER shared task at #SMM4H–HeaRD 2026, targeting multilingual clinical named entity recognition across seven languages for three entity types (Disease, Procedure, Symptom). We extend the Multi-Head CRF architecture, originally developed for multi-class NER on Spanish clinical text, to the multilingual setting. To enable joint multi-entity training despite per-entity text variations in the dataset, we develop an adaptive text consolidation pipeline that preserves over 94% of annotations. Our central finding is that a single xlm-roberta-large model, trained jointly on all seven languages and three entity types, achieves competition rank 2 for five of seven languages, outperforming dedicated monolingual models by up to +6.94 F1 points, while requiring only a single set of weights. Ensembling multiple seeds of this model achieves rank 1 for those five languages, and combining it with monolingual models yields rank 1 for the remaining two. Code and models are publicly available at https://github.com/ieeta-pt/Multi-Head-CRF/tree/MultiClinNER and https://huggingface.co/collections/IEETA/multiclinner-models.
%U https://aclanthology.org/2026.smm4h-1.8/
%P 41-48
Markdown (Informal)
[BIT.UA at #SMM4H–HeaRD 2026: Towards Multi-Class Multilingual Clinical Entity Recognition with Multi-Head CRF Ensembles](https://aclanthology.org/2026.smm4h-1.8/) (Jonker & Matos, SMM4H 2026)
ACL