@inproceedings{dada-etal-2025-medisumqa,
title = "{M}e{D}i{S}um{QA}: Patient-Oriented Question-Answer Generation from Discharge Letters",
author = "Dada, Amin and
Koras, Osman and
Bauer, Marie and
Butler, Amanda and
Smith, Kaleb and
Kleesiek, Jens and
Friedrich, Julian",
editor = "Ananiadou, Sophia and
Demner-Fushman, Dina and
Gupta, Deepak and
Thompson, Paul",
booktitle = "Proceedings of the Second Workshop on Patient-Oriented Language Processing (CL4Health)",
month = may,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.cl4health-1.10/",
doi = "10.18653/v1/2025.cl4health-1.10",
pages = "124--136",
ISBN = "979-8-89176-238-1",
abstract = "While increasing patients' access to medical documents improves medical care, this benefit is limited by varying health literacy levels and complex medical terminology. Large language models (LLMs) offer solutions by simplifying medical information. However, evaluating LLMs for safe and patient-friendly text generation is difficult due to the lack of standardized evaluation resources. To fill this gap, we developed MeDiSumQA. MeDiSumQA is a dataset created from MIMIC-IV discharge summaries through an automated pipeline combining LLM-based question-answer generation with manual quality checks. We use this dataset to evaluate various LLMs on patient-oriented question-answering. Our findings reveal that general-purpose LLMs frequently surpass biomedical-adapted models, while automated metrics correlate with human judgment. By releasing MeDiSumQA on PhysioNet, we aim to advance the development of LLMs to enhance patient understanding and ultimately improve care outcomes."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dada-etal-2025-medisumqa">
<titleInfo>
<title>MeDiSumQA: Patient-Oriented Question-Answer Generation from Discharge Letters</title>
</titleInfo>
<name type="personal">
<namePart type="given">Amin</namePart>
<namePart type="family">Dada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Osman</namePart>
<namePart type="family">Koras</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie</namePart>
<namePart type="family">Bauer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amanda</namePart>
<namePart type="family">Butler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kaleb</namePart>
<namePart type="family">Smith</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jens</namePart>
<namePart type="family">Kleesiek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julian</namePart>
<namePart type="family">Friedrich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Patient-Oriented Language Processing (CL4Health)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="family">Ananiadou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dina</namePart>
<namePart type="family">Demner-Fushman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Deepak</namePart>
<namePart type="family">Gupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Thompson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-238-1</identifier>
</relatedItem>
<abstract>While increasing patients’ access to medical documents improves medical care, this benefit is limited by varying health literacy levels and complex medical terminology. Large language models (LLMs) offer solutions by simplifying medical information. However, evaluating LLMs for safe and patient-friendly text generation is difficult due to the lack of standardized evaluation resources. To fill this gap, we developed MeDiSumQA. MeDiSumQA is a dataset created from MIMIC-IV discharge summaries through an automated pipeline combining LLM-based question-answer generation with manual quality checks. We use this dataset to evaluate various LLMs on patient-oriented question-answering. Our findings reveal that general-purpose LLMs frequently surpass biomedical-adapted models, while automated metrics correlate with human judgment. By releasing MeDiSumQA on PhysioNet, we aim to advance the development of LLMs to enhance patient understanding and ultimately improve care outcomes.</abstract>
<identifier type="citekey">dada-etal-2025-medisumqa</identifier>
<identifier type="doi">10.18653/v1/2025.cl4health-1.10</identifier>
<location>
<url>https://aclanthology.org/2025.cl4health-1.10/</url>
</location>
<part>
<date>2025-05</date>
<extent unit="page">
<start>124</start>
<end>136</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MeDiSumQA: Patient-Oriented Question-Answer Generation from Discharge Letters
%A Dada, Amin
%A Koras, Osman
%A Bauer, Marie
%A Butler, Amanda
%A Smith, Kaleb
%A Kleesiek, Jens
%A Friedrich, Julian
%Y Ananiadou, Sophia
%Y Demner-Fushman, Dina
%Y Gupta, Deepak
%Y Thompson, Paul
%S Proceedings of the Second Workshop on Patient-Oriented Language Processing (CL4Health)
%D 2025
%8 May
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-238-1
%F dada-etal-2025-medisumqa
%X While increasing patients’ access to medical documents improves medical care, this benefit is limited by varying health literacy levels and complex medical terminology. Large language models (LLMs) offer solutions by simplifying medical information. However, evaluating LLMs for safe and patient-friendly text generation is difficult due to the lack of standardized evaluation resources. To fill this gap, we developed MeDiSumQA. MeDiSumQA is a dataset created from MIMIC-IV discharge summaries through an automated pipeline combining LLM-based question-answer generation with manual quality checks. We use this dataset to evaluate various LLMs on patient-oriented question-answering. Our findings reveal that general-purpose LLMs frequently surpass biomedical-adapted models, while automated metrics correlate with human judgment. By releasing MeDiSumQA on PhysioNet, we aim to advance the development of LLMs to enhance patient understanding and ultimately improve care outcomes.
%R 10.18653/v1/2025.cl4health-1.10
%U https://aclanthology.org/2025.cl4health-1.10/
%U https://doi.org/10.18653/v1/2025.cl4health-1.10
%P 124-136
Markdown (Informal)
[MeDiSumQA: Patient-Oriented Question-Answer Generation from Discharge Letters](https://aclanthology.org/2025.cl4health-1.10/) (Dada et al., CL4Health 2025)
ACL