@inproceedings{araujo-etal-2026-normatex,
title = "{N}orma{T}ex-{M}ap{SNOMED}: Bridging the Gap Between {B}razilian {P}ortuguese Clinical Narratives and {SNOMED} {CT}",
author = "Araujo, Isabela and
Moro, Claudia and
Martinez, Layslla",
editor = "Souza, Marlo and
de-Dios-Flores, Iria and
Santos, Diana and
Freitas, Larissa and
Souza, Jackson Wilke da Cruz and
Ribeiro, Eug{\'e}nio",
booktitle = "Proceedings of the 17th International Conference on Computational Processing of {P}ortuguese ({PROPOR} 2026) - Vol. 1",
month = apr,
year = "2026",
address = "Salvador, Brazil",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.propor-1.115/",
pages = "1085--1091",
ISBN = "979-8-89176-387-6",
abstract = "Clinical narratives written in free text contain valuable information for patient care. However, their unstructured nature and linguistic variability pose significant challenges for automatic processing and interoperability. In particular, mapping clinical terms to standardized terminologies such as SNOMED Clinical Terms (SNOMED CT) remains difficult for languages other than English, including Brazilian Portuguese. This paper presents NormaTex-MapSNOMED, a proposed component of the NormaTex framework that focuses on mapping clinical terms to predefined categories aligned with SNOMED CT. Given previously extracted terms, the method leverages large language models (LLMs) guided by a structured prompt to assign terms to target categories. Experiments were conducted on Portuguese-language clinical narratives and evaluated using three complementary strategies: lexical similarity based on Levenshtein distance, contextual similarity using a BERT-based model, and semantic validation using LLMs. The results show that LLM-based evaluation consistently outperforms lexical and contextual baselines across different models, with higher precision observed for disease-related terms compared to symptom-related expressions. These findings indicate that LLMs are a promising approach for semantic mapping of clinical terms in Brazilian Portuguese and can support clinical term normalization and interoperability with standardized terminologies."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="araujo-etal-2026-normatex">
<titleInfo>
<title>NormaTex-MapSNOMED: Bridging the Gap Between Brazilian Portuguese Clinical Narratives and SNOMED CT</title>
</titleInfo>
<name type="personal">
<namePart type="given">Isabela</namePart>
<namePart type="family">Araujo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claudia</namePart>
<namePart type="family">Moro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Layslla</namePart>
<namePart type="family">Martinez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 1</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marlo</namePart>
<namePart type="family">Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iria</namePart>
<namePart type="family">de-Dios-Flores</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diana</namePart>
<namePart type="family">Santos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Larissa</namePart>
<namePart type="family">Freitas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jackson</namePart>
<namePart type="given">Wilke</namePart>
<namePart type="given">da</namePart>
<namePart type="given">Cruz</namePart>
<namePart type="family">Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eugénio</namePart>
<namePart type="family">Ribeiro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Salvador, Brazil</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-387-6</identifier>
</relatedItem>
<abstract>Clinical narratives written in free text contain valuable information for patient care. However, their unstructured nature and linguistic variability pose significant challenges for automatic processing and interoperability. In particular, mapping clinical terms to standardized terminologies such as SNOMED Clinical Terms (SNOMED CT) remains difficult for languages other than English, including Brazilian Portuguese. This paper presents NormaTex-MapSNOMED, a proposed component of the NormaTex framework that focuses on mapping clinical terms to predefined categories aligned with SNOMED CT. Given previously extracted terms, the method leverages large language models (LLMs) guided by a structured prompt to assign terms to target categories. Experiments were conducted on Portuguese-language clinical narratives and evaluated using three complementary strategies: lexical similarity based on Levenshtein distance, contextual similarity using a BERT-based model, and semantic validation using LLMs. The results show that LLM-based evaluation consistently outperforms lexical and contextual baselines across different models, with higher precision observed for disease-related terms compared to symptom-related expressions. These findings indicate that LLMs are a promising approach for semantic mapping of clinical terms in Brazilian Portuguese and can support clinical term normalization and interoperability with standardized terminologies.</abstract>
<identifier type="citekey">araujo-etal-2026-normatex</identifier>
<location>
<url>https://aclanthology.org/2026.propor-1.115/</url>
</location>
<part>
<date>2026-04</date>
<extent unit="page">
<start>1085</start>
<end>1091</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T NormaTex-MapSNOMED: Bridging the Gap Between Brazilian Portuguese Clinical Narratives and SNOMED CT
%A Araujo, Isabela
%A Moro, Claudia
%A Martinez, Layslla
%Y Souza, Marlo
%Y de-Dios-Flores, Iria
%Y Santos, Diana
%Y Freitas, Larissa
%Y Souza, Jackson Wilke da Cruz
%Y Ribeiro, Eugénio
%S Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 1
%D 2026
%8 April
%I Association for Computational Linguistics
%C Salvador, Brazil
%@ 979-8-89176-387-6
%F araujo-etal-2026-normatex
%X Clinical narratives written in free text contain valuable information for patient care. However, their unstructured nature and linguistic variability pose significant challenges for automatic processing and interoperability. In particular, mapping clinical terms to standardized terminologies such as SNOMED Clinical Terms (SNOMED CT) remains difficult for languages other than English, including Brazilian Portuguese. This paper presents NormaTex-MapSNOMED, a proposed component of the NormaTex framework that focuses on mapping clinical terms to predefined categories aligned with SNOMED CT. Given previously extracted terms, the method leverages large language models (LLMs) guided by a structured prompt to assign terms to target categories. Experiments were conducted on Portuguese-language clinical narratives and evaluated using three complementary strategies: lexical similarity based on Levenshtein distance, contextual similarity using a BERT-based model, and semantic validation using LLMs. The results show that LLM-based evaluation consistently outperforms lexical and contextual baselines across different models, with higher precision observed for disease-related terms compared to symptom-related expressions. These findings indicate that LLMs are a promising approach for semantic mapping of clinical terms in Brazilian Portuguese and can support clinical term normalization and interoperability with standardized terminologies.
%U https://aclanthology.org/2026.propor-1.115/
%P 1085-1091
Markdown (Informal)
[NormaTex-MapSNOMED: Bridging the Gap Between Brazilian Portuguese Clinical Narratives and SNOMED CT](https://aclanthology.org/2026.propor-1.115/) (Araujo et al., PROPOR 2026)
ACL