@inproceedings{donoso-etal-2026-multiclinai,
title = "The {M}ulti{C}lin{AI} Shared Task on Multilingual Clinical Corpus Construction and Concept Extraction: Systems, Evaluation, and Datasets",
author = "Donoso, Fernando Gallego and
Lima-Lopez, Salvador and
Rosell, Judith and
Farr{\'e}-Maduel, Eul{\`a}lia and
Krallinger, Martin",
editor = "Lopez-Garcia, Guillermo and
Gonzalez-Hernandez, Graciela",
booktitle = "Proceedings of the 11th Social Media Mining for Health Research and Applications ({SMM}4{H}-{H}ea{RD} 2026) Workshop and Shared Tasks",
month = jul,
year = "2026",
address = "San Diego, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.smm4h-1.49/",
pages = "309--331",
ISBN = "979-8-89176-432-3",
abstract = "We present an overview of the MultiClinAI shared task, which focuses on multilingual clinical entity extraction and automatic corpus generation through annotation projection. It addresses two key challenges in clinical natural language processing (NLP): (i) developing comparable multilingual named entity recognition (NER) systems and (ii) automatically constructing multilingual clinical corpora through annotation projection. The MultiClinAI task provides a unified benchmark for evaluating multilingual and cross-lingual clinical NLP approaches that cover diseases, symptoms, and procedures in Spanish, English, Dutch, Italian, Romanian, Swedish, and Czech. A total of 21 teams from 13 countries participated, submitting 531 runs across the different subtasks. The top runs obtained very competitive results, close to human expert annotation quality. The results highlight both the challenges and opportunities of multilingual clinical information extraction. All resources, including a corpus of over 738,201 manually revised entity mentions across seven languages, are publicly available on Zenodo at: \url{https://zenodo.org/records/19334278}."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="donoso-etal-2026-multiclinai">
<titleInfo>
<title>The MultiClinAI Shared Task on Multilingual Clinical Corpus Construction and Concept Extraction: Systems, Evaluation, and Datasets</title>
</titleInfo>
<name type="personal">
<namePart type="given">Fernando</namePart>
<namePart type="given">Gallego</namePart>
<namePart type="family">Donoso</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Salvador</namePart>
<namePart type="family">Lima-Lopez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Judith</namePart>
<namePart type="family">Rosell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eulàlia</namePart>
<namePart type="family">Farré-Maduel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Martin</namePart>
<namePart type="family">Krallinger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 11th Social Media Mining for Health Research and Applications (SMM4H-HeaRD 2026) Workshop and Shared Tasks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Guillermo</namePart>
<namePart type="family">Lopez-Garcia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Graciela</namePart>
<namePart type="family">Gonzalez-Hernandez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-432-3</identifier>
</relatedItem>
<abstract>We present an overview of the MultiClinAI shared task, which focuses on multilingual clinical entity extraction and automatic corpus generation through annotation projection. It addresses two key challenges in clinical natural language processing (NLP): (i) developing comparable multilingual named entity recognition (NER) systems and (ii) automatically constructing multilingual clinical corpora through annotation projection. The MultiClinAI task provides a unified benchmark for evaluating multilingual and cross-lingual clinical NLP approaches that cover diseases, symptoms, and procedures in Spanish, English, Dutch, Italian, Romanian, Swedish, and Czech. A total of 21 teams from 13 countries participated, submitting 531 runs across the different subtasks. The top runs obtained very competitive results, close to human expert annotation quality. The results highlight both the challenges and opportunities of multilingual clinical information extraction. All resources, including a corpus of over 738,201 manually revised entity mentions across seven languages, are publicly available on Zenodo at: https://zenodo.org/records/19334278.</abstract>
<identifier type="citekey">donoso-etal-2026-multiclinai</identifier>
<location>
<url>https://aclanthology.org/2026.smm4h-1.49/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>309</start>
<end>331</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The MultiClinAI Shared Task on Multilingual Clinical Corpus Construction and Concept Extraction: Systems, Evaluation, and Datasets
%A Donoso, Fernando Gallego
%A Lima-Lopez, Salvador
%A Rosell, Judith
%A Farré-Maduel, Eulàlia
%A Krallinger, Martin
%Y Lopez-Garcia, Guillermo
%Y Gonzalez-Hernandez, Graciela
%S Proceedings of the 11th Social Media Mining for Health Research and Applications (SMM4H-HeaRD 2026) Workshop and Shared Tasks
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, United States
%@ 979-8-89176-432-3
%F donoso-etal-2026-multiclinai
%X We present an overview of the MultiClinAI shared task, which focuses on multilingual clinical entity extraction and automatic corpus generation through annotation projection. It addresses two key challenges in clinical natural language processing (NLP): (i) developing comparable multilingual named entity recognition (NER) systems and (ii) automatically constructing multilingual clinical corpora through annotation projection. The MultiClinAI task provides a unified benchmark for evaluating multilingual and cross-lingual clinical NLP approaches that cover diseases, symptoms, and procedures in Spanish, English, Dutch, Italian, Romanian, Swedish, and Czech. A total of 21 teams from 13 countries participated, submitting 531 runs across the different subtasks. The top runs obtained very competitive results, close to human expert annotation quality. The results highlight both the challenges and opportunities of multilingual clinical information extraction. All resources, including a corpus of over 738,201 manually revised entity mentions across seven languages, are publicly available on Zenodo at: https://zenodo.org/records/19334278.
%U https://aclanthology.org/2026.smm4h-1.49/
%P 309-331
Markdown (Informal)
[The MultiClinAI Shared Task on Multilingual Clinical Corpus Construction and Concept Extraction: Systems, Evaluation, and Datasets](https://aclanthology.org/2026.smm4h-1.49/) (Donoso et al., SMM4H 2026)
ACL