@inproceedings{leite-etal-2026-llm,
title = "{LLM}-Based Multi-Agent System with Retrieval-Augmented Generation for Medical Care Planning Generation in Sickle Cell Disease",
author = "Leite, Luana Bringel and
Pereira, David Eduardo and
Azevedo, Eyshila Buriti de Araujo and
Filho, Leonardo Mota Meira and
Ara{\'u}jo, Eliane Cristina and
Campelo, Cl{\'a}udio E. C. and
Marques, Taciana R. O. C. and
Almeida, Let{\'i}cia B. de and
Gomes, Herman Martins",
editor = "Souza, Marlo and
de-Dios-Flores, Iria and
Santos, Diana and
Freitas, Larissa and
Souza, Jackson Wilke da Cruz and
Ribeiro, Eug{\'e}nio",
booktitle = "Proceedings of the 17th International Conference on Computational Processing of {P}ortuguese ({PROPOR} 2026) - Vol. 2",
month = apr,
year = "2026",
address = "Salvador, Brazil",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.propor-2.16/",
pages = "88--100",
ISBN = "979-8-89176-387-6",
abstract = "Ensuring safety in clinical applications of large language models (LLMs) remains an unresolved challenge, particularly for high-risk and underrepresented conditions such as Sickle Cell Disease (SCD). Consequently, these models may exhibit limited reliability for SCD, including hallucinations and clinically unsafe outputs. This paper proposes an LLM-based Multi-Agent System (MAS) enhanced by Retrieval-Augmented Generation (RAG) to support the generation of medical care plans for SCD. The MAS decomposes clinical reasoning into specialized agents responsible for diagnosis, investigation, and treatment planning. Retrieval is framed not as a performance optimization, but as a safety control mechanism. Three RAG strategies, namely LLM-Guided Tree Retrieval, Metadata-Filtered Retrieval, and Semantic Similarity Retrieval, are evaluated alongside a baseline. Our experiments considered LLM-as-a-Judge evaluations and independent assessments by physicians. The results demonstrate high clinical quality, with safety scores exceeding 4 on a 5-point scale. While average performance was similar between RAG and baseline conditions, the Tree Retrieval strategy reduced the frequency of clinically unsafe outputs compared to conventional Semantic Retrieval, indicating fewer clinically unsafe outputs. These findings show evidence that average performance is insufficient to evaluate clinical AI systems, particularly in high-risk scenarios where retrieval serves as a safety control layer."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="leite-etal-2026-llm">
<titleInfo>
<title>LLM-Based Multi-Agent System with Retrieval-Augmented Generation for Medical Care Planning Generation in Sickle Cell Disease</title>
</titleInfo>
<name type="personal">
<namePart type="given">Luana</namePart>
<namePart type="given">Bringel</namePart>
<namePart type="family">Leite</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="given">Eduardo</namePart>
<namePart type="family">Pereira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eyshila</namePart>
<namePart type="given">Buriti</namePart>
<namePart type="given">de</namePart>
<namePart type="given">Araujo</namePart>
<namePart type="family">Azevedo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leonardo</namePart>
<namePart type="given">Mota</namePart>
<namePart type="given">Meira</namePart>
<namePart type="family">Filho</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eliane</namePart>
<namePart type="given">Cristina</namePart>
<namePart type="family">Araújo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cláudio</namePart>
<namePart type="given">E</namePart>
<namePart type="given">C</namePart>
<namePart type="family">Campelo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Taciana</namePart>
<namePart type="given">R</namePart>
<namePart type="given">O</namePart>
<namePart type="given">C</namePart>
<namePart type="family">Marques</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Letícia</namePart>
<namePart type="given">B</namePart>
<namePart type="given">de</namePart>
<namePart type="family">Almeida</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Herman</namePart>
<namePart type="given">Martins</namePart>
<namePart type="family">Gomes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 2</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marlo</namePart>
<namePart type="family">Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iria</namePart>
<namePart type="family">de-Dios-Flores</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diana</namePart>
<namePart type="family">Santos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Larissa</namePart>
<namePart type="family">Freitas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jackson</namePart>
<namePart type="given">Wilke</namePart>
<namePart type="given">da</namePart>
<namePart type="given">Cruz</namePart>
<namePart type="family">Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eugénio</namePart>
<namePart type="family">Ribeiro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Salvador, Brazil</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-387-6</identifier>
</relatedItem>
<abstract>Ensuring safety in clinical applications of large language models (LLMs) remains an unresolved challenge, particularly for high-risk and underrepresented conditions such as Sickle Cell Disease (SCD). Consequently, these models may exhibit limited reliability for SCD, including hallucinations and clinically unsafe outputs. This paper proposes an LLM-based Multi-Agent System (MAS) enhanced by Retrieval-Augmented Generation (RAG) to support the generation of medical care plans for SCD. The MAS decomposes clinical reasoning into specialized agents responsible for diagnosis, investigation, and treatment planning. Retrieval is framed not as a performance optimization, but as a safety control mechanism. Three RAG strategies, namely LLM-Guided Tree Retrieval, Metadata-Filtered Retrieval, and Semantic Similarity Retrieval, are evaluated alongside a baseline. Our experiments considered LLM-as-a-Judge evaluations and independent assessments by physicians. The results demonstrate high clinical quality, with safety scores exceeding 4 on a 5-point scale. While average performance was similar between RAG and baseline conditions, the Tree Retrieval strategy reduced the frequency of clinically unsafe outputs compared to conventional Semantic Retrieval, indicating fewer clinically unsafe outputs. These findings show evidence that average performance is insufficient to evaluate clinical AI systems, particularly in high-risk scenarios where retrieval serves as a safety control layer.</abstract>
<identifier type="citekey">leite-etal-2026-llm</identifier>
<location>
<url>https://aclanthology.org/2026.propor-2.16/</url>
</location>
<part>
<date>2026-04</date>
<extent unit="page">
<start>88</start>
<end>100</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T LLM-Based Multi-Agent System with Retrieval-Augmented Generation for Medical Care Planning Generation in Sickle Cell Disease
%A Leite, Luana Bringel
%A Pereira, David Eduardo
%A Azevedo, Eyshila Buriti de Araujo
%A Filho, Leonardo Mota Meira
%A Araújo, Eliane Cristina
%A Campelo, Cláudio E. C.
%A Marques, Taciana R. O. C.
%A Almeida, Letícia B. de
%A Gomes, Herman Martins
%Y Souza, Marlo
%Y de-Dios-Flores, Iria
%Y Santos, Diana
%Y Freitas, Larissa
%Y Souza, Jackson Wilke da Cruz
%Y Ribeiro, Eugénio
%S Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 2
%D 2026
%8 April
%I Association for Computational Linguistics
%C Salvador, Brazil
%@ 979-8-89176-387-6
%F leite-etal-2026-llm
%X Ensuring safety in clinical applications of large language models (LLMs) remains an unresolved challenge, particularly for high-risk and underrepresented conditions such as Sickle Cell Disease (SCD). Consequently, these models may exhibit limited reliability for SCD, including hallucinations and clinically unsafe outputs. This paper proposes an LLM-based Multi-Agent System (MAS) enhanced by Retrieval-Augmented Generation (RAG) to support the generation of medical care plans for SCD. The MAS decomposes clinical reasoning into specialized agents responsible for diagnosis, investigation, and treatment planning. Retrieval is framed not as a performance optimization, but as a safety control mechanism. Three RAG strategies, namely LLM-Guided Tree Retrieval, Metadata-Filtered Retrieval, and Semantic Similarity Retrieval, are evaluated alongside a baseline. Our experiments considered LLM-as-a-Judge evaluations and independent assessments by physicians. The results demonstrate high clinical quality, with safety scores exceeding 4 on a 5-point scale. While average performance was similar between RAG and baseline conditions, the Tree Retrieval strategy reduced the frequency of clinically unsafe outputs compared to conventional Semantic Retrieval, indicating fewer clinically unsafe outputs. These findings show evidence that average performance is insufficient to evaluate clinical AI systems, particularly in high-risk scenarios where retrieval serves as a safety control layer.
%U https://aclanthology.org/2026.propor-2.16/
%P 88-100
Markdown (Informal)
[LLM-Based Multi-Agent System with Retrieval-Augmented Generation for Medical Care Planning Generation in Sickle Cell Disease](https://aclanthology.org/2026.propor-2.16/) (Leite et al., PROPOR 2026)
ACL
- Luana Bringel Leite, David Eduardo Pereira, Eyshila Buriti de Araujo Azevedo, Leonardo Mota Meira Filho, Eliane Cristina Araújo, Cláudio E. C. Campelo, Taciana R. O. C. Marques, Letícia B. de Almeida, and Herman Martins Gomes. 2026. LLM-Based Multi-Agent System with Retrieval-Augmented Generation for Medical Care Planning Generation in Sickle Cell Disease. In Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 2, pages 88–100, Salvador, Brazil. Association for Computational Linguistics.