@inproceedings{pakull-etal-2025-preliminary,
title = "Preliminary Evaluation of an Open-Source {LLM} for Lay Translation of {G}erman Clinical Documents",
author = {Pakull, Tabea and
Dada, Amin and
Damm, Hendrik and
Fleischhauer, Anke and
Benson, Sven and
Bender, No{\"e}lle and
Prasuhn, Nicola and
Kaminski, Katharina and
Friedrich, Christoph and
Horn, Peter and
Kleesiek, Jens and
Schadendorf, Dirk and
Pretzell, Ina},
editor = "Ananiadou, Sophia and
Demner-Fushman, Dina and
Gupta, Deepak and
Thompson, Paul",
booktitle = "Proceedings of the Second Workshop on Patient-Oriented Language Processing (CL4Health)",
month = may,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.cl4health-1.15/",
doi = "10.18653/v1/2025.cl4health-1.15",
pages = "180--192",
ISBN = "979-8-89176-238-1",
abstract = "Clinical documents are essential to patient care, but their complexity often makes them inaccessible to patients. Large Language Models (LLMs) are a promising solution to support the creation of lay translations of these documents, addressing the infeasibility of manually creating these translations in busy clinical settings. However, the integration of LLMs into medical practice in Germany is challenging due to data scarcity and privacy regulations. This work evaluates an open-source LLM for lay translation in this data-scarce environment using datasets of German synthetic clinical documents and real tumor board protocols. The evaluation framework used combines readability, semantic, and lexical measures with the G-Eval framework. Preliminary results show that zero-shot prompts significantly improve readability (e.g., FREde: 21.4 {\textrightarrow} 39.3) and few-shot prompts improve semantic and lexical fidelity. However, the results also reveal G-Eval{'}s limitations in distinguishing between intentional omissions and factual inaccuracies. These findings underscore the need for manual review in clinical applications to ensure both accessibility and accuracy in lay translations. Furthermore, the effectiveness of prompting highlights the need for future work to develop applications that use predefined prompts in the background to reduce clinician workload."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pakull-etal-2025-preliminary">
<titleInfo>
<title>Preliminary Evaluation of an Open-Source LLM for Lay Translation of German Clinical Documents</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tabea</namePart>
<namePart type="family">Pakull</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amin</namePart>
<namePart type="family">Dada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hendrik</namePart>
<namePart type="family">Damm</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anke</namePart>
<namePart type="family">Fleischhauer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sven</namePart>
<namePart type="family">Benson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Noëlle</namePart>
<namePart type="family">Bender</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nicola</namePart>
<namePart type="family">Prasuhn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katharina</namePart>
<namePart type="family">Kaminski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christoph</namePart>
<namePart type="family">Friedrich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Horn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jens</namePart>
<namePart type="family">Kleesiek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dirk</namePart>
<namePart type="family">Schadendorf</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ina</namePart>
<namePart type="family">Pretzell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Patient-Oriented Language Processing (CL4Health)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="family">Ananiadou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dina</namePart>
<namePart type="family">Demner-Fushman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Deepak</namePart>
<namePart type="family">Gupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Thompson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-238-1</identifier>
</relatedItem>
<abstract>Clinical documents are essential to patient care, but their complexity often makes them inaccessible to patients. Large Language Models (LLMs) are a promising solution to support the creation of lay translations of these documents, addressing the infeasibility of manually creating these translations in busy clinical settings. However, the integration of LLMs into medical practice in Germany is challenging due to data scarcity and privacy regulations. This work evaluates an open-source LLM for lay translation in this data-scarce environment using datasets of German synthetic clinical documents and real tumor board protocols. The evaluation framework used combines readability, semantic, and lexical measures with the G-Eval framework. Preliminary results show that zero-shot prompts significantly improve readability (e.g., FREde: 21.4 → 39.3) and few-shot prompts improve semantic and lexical fidelity. However, the results also reveal G-Eval’s limitations in distinguishing between intentional omissions and factual inaccuracies. These findings underscore the need for manual review in clinical applications to ensure both accessibility and accuracy in lay translations. Furthermore, the effectiveness of prompting highlights the need for future work to develop applications that use predefined prompts in the background to reduce clinician workload.</abstract>
<identifier type="citekey">pakull-etal-2025-preliminary</identifier>
<identifier type="doi">10.18653/v1/2025.cl4health-1.15</identifier>
<location>
<url>https://aclanthology.org/2025.cl4health-1.15/</url>
</location>
<part>
<date>2025-05</date>
<extent unit="page">
<start>180</start>
<end>192</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Preliminary Evaluation of an Open-Source LLM for Lay Translation of German Clinical Documents
%A Pakull, Tabea
%A Dada, Amin
%A Damm, Hendrik
%A Fleischhauer, Anke
%A Benson, Sven
%A Bender, Noëlle
%A Prasuhn, Nicola
%A Kaminski, Katharina
%A Friedrich, Christoph
%A Horn, Peter
%A Kleesiek, Jens
%A Schadendorf, Dirk
%A Pretzell, Ina
%Y Ananiadou, Sophia
%Y Demner-Fushman, Dina
%Y Gupta, Deepak
%Y Thompson, Paul
%S Proceedings of the Second Workshop on Patient-Oriented Language Processing (CL4Health)
%D 2025
%8 May
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-238-1
%F pakull-etal-2025-preliminary
%X Clinical documents are essential to patient care, but their complexity often makes them inaccessible to patients. Large Language Models (LLMs) are a promising solution to support the creation of lay translations of these documents, addressing the infeasibility of manually creating these translations in busy clinical settings. However, the integration of LLMs into medical practice in Germany is challenging due to data scarcity and privacy regulations. This work evaluates an open-source LLM for lay translation in this data-scarce environment using datasets of German synthetic clinical documents and real tumor board protocols. The evaluation framework used combines readability, semantic, and lexical measures with the G-Eval framework. Preliminary results show that zero-shot prompts significantly improve readability (e.g., FREde: 21.4 → 39.3) and few-shot prompts improve semantic and lexical fidelity. However, the results also reveal G-Eval’s limitations in distinguishing between intentional omissions and factual inaccuracies. These findings underscore the need for manual review in clinical applications to ensure both accessibility and accuracy in lay translations. Furthermore, the effectiveness of prompting highlights the need for future work to develop applications that use predefined prompts in the background to reduce clinician workload.
%R 10.18653/v1/2025.cl4health-1.15
%U https://aclanthology.org/2025.cl4health-1.15/
%U https://doi.org/10.18653/v1/2025.cl4health-1.15
%P 180-192
Markdown (Informal)
[Preliminary Evaluation of an Open-Source LLM for Lay Translation of German Clinical Documents](https://aclanthology.org/2025.cl4health-1.15/) (Pakull et al., CL4Health 2025)
ACL
- Tabea Pakull, Amin Dada, Hendrik Damm, Anke Fleischhauer, Sven Benson, Noëlle Bender, Nicola Prasuhn, Katharina Kaminski, Christoph Friedrich, Peter Horn, Jens Kleesiek, Dirk Schadendorf, and Ina Pretzell. 2025. Preliminary Evaluation of an Open-Source LLM for Lay Translation of German Clinical Documents. In Proceedings of the Second Workshop on Patient-Oriented Language Processing (CL4Health), pages 180–192, Albuquerque, New Mexico. Association for Computational Linguistics.