@inproceedings{corbeil-etal-2026-medriskeval,
title = "{M}ed{R}isk{E}val: Medical Risk Evaluation Benchmark of Language Models, On the Importance of User Perspectives in Healthcare Settings",
author = "Corbeil, Jean-Philippe and
Kim, Minseon and
Griot, Maxime and
Agarwal, Sheela and
Sordoni, Alessandro and
Beaulieu, Francois and
Vozila, Paul",
editor = {Matusevych, Yevgen and
Eryi{\u{g}}it, G{\"u}l{\c{s}}en and
Aletras, Nikolaos},
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 5: Industry Track)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.eacl-industry.39/",
pages = "513--524",
ISBN = "979-8-89176-384-5",
abstract = "As the performance of large language models (LLMs) continues to advance, their adoption in the medical domain is increasing. However, most existing risk evaluations largely focused on general safety benchmarks. In the medical applications, LLMs may be used by a wide range of users, ranging from general users and patients to clinicians, with diverse levels of expertise and the model{'}s outputs can have a direct impact on human health which raises serious safety concerns. In this paper, we introduce MedRiskEval, a medical risk evaluation benchmark tailored to the medical domain. To fill the gap in previous benchmarks that only focused on the clinician perspective, we introduce a new patient-oriented dataset called PatientSafetyBench containing 466 samples across 5 critical risk categories. Leveraging our new benchmark alongside existing datasets, we evaluate a variety of open- and closed-source LLMs. To the best of our knowledge, this work establishes an initial foundation for safer deployment of LLMs in healthcare."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="corbeil-etal-2026-medriskeval">
<titleInfo>
<title>MedRiskEval: Medical Risk Evaluation Benchmark of Language Models, On the Importance of User Perspectives in Healthcare Settings</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jean-Philippe</namePart>
<namePart type="family">Corbeil</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Minseon</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maxime</namePart>
<namePart type="family">Griot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sheela</namePart>
<namePart type="family">Agarwal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Sordoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francois</namePart>
<namePart type="family">Beaulieu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Vozila</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 5: Industry Track)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yevgen</namePart>
<namePart type="family">Matusevych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gülşen</namePart>
<namePart type="family">Eryiğit</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikolaos</namePart>
<namePart type="family">Aletras</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-384-5</identifier>
</relatedItem>
<abstract>As the performance of large language models (LLMs) continues to advance, their adoption in the medical domain is increasing. However, most existing risk evaluations largely focused on general safety benchmarks. In the medical applications, LLMs may be used by a wide range of users, ranging from general users and patients to clinicians, with diverse levels of expertise and the model’s outputs can have a direct impact on human health which raises serious safety concerns. In this paper, we introduce MedRiskEval, a medical risk evaluation benchmark tailored to the medical domain. To fill the gap in previous benchmarks that only focused on the clinician perspective, we introduce a new patient-oriented dataset called PatientSafetyBench containing 466 samples across 5 critical risk categories. Leveraging our new benchmark alongside existing datasets, we evaluate a variety of open- and closed-source LLMs. To the best of our knowledge, this work establishes an initial foundation for safer deployment of LLMs in healthcare.</abstract>
<identifier type="citekey">corbeil-etal-2026-medriskeval</identifier>
<location>
<url>https://aclanthology.org/2026.eacl-industry.39/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>513</start>
<end>524</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MedRiskEval: Medical Risk Evaluation Benchmark of Language Models, On the Importance of User Perspectives in Healthcare Settings
%A Corbeil, Jean-Philippe
%A Kim, Minseon
%A Griot, Maxime
%A Agarwal, Sheela
%A Sordoni, Alessandro
%A Beaulieu, Francois
%A Vozila, Paul
%Y Matusevych, Yevgen
%Y Eryiğit, Gülşen
%Y Aletras, Nikolaos
%S Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 5: Industry Track)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-384-5
%F corbeil-etal-2026-medriskeval
%X As the performance of large language models (LLMs) continues to advance, their adoption in the medical domain is increasing. However, most existing risk evaluations largely focused on general safety benchmarks. In the medical applications, LLMs may be used by a wide range of users, ranging from general users and patients to clinicians, with diverse levels of expertise and the model’s outputs can have a direct impact on human health which raises serious safety concerns. In this paper, we introduce MedRiskEval, a medical risk evaluation benchmark tailored to the medical domain. To fill the gap in previous benchmarks that only focused on the clinician perspective, we introduce a new patient-oriented dataset called PatientSafetyBench containing 466 samples across 5 critical risk categories. Leveraging our new benchmark alongside existing datasets, we evaluate a variety of open- and closed-source LLMs. To the best of our knowledge, this work establishes an initial foundation for safer deployment of LLMs in healthcare.
%U https://aclanthology.org/2026.eacl-industry.39/
%P 513-524
Markdown (Informal)
[MedRiskEval: Medical Risk Evaluation Benchmark of Language Models, On the Importance of User Perspectives in Healthcare Settings](https://aclanthology.org/2026.eacl-industry.39/) (Corbeil et al., EACL 2026)
ACL
- Jean-Philippe Corbeil, Minseon Kim, Maxime Griot, Sheela Agarwal, Alessandro Sordoni, Francois Beaulieu, and Paul Vozila. 2026. MedRiskEval: Medical Risk Evaluation Benchmark of Language Models, On the Importance of User Perspectives in Healthcare Settings. In Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 5: Industry Track), pages 513–524, Rabat, Morocco. Association for Computational Linguistics.