@inproceedings{hofenbitzer-etal-2025-germediq,
title = "{G}er{M}ed{IQ}: A Resource for Simulated and Synthesized Anamnesis Interview Responses in {G}erman",
author = {Hofenbitzer, Justin and
Sch{\"o}ning, Sebastian and
Belle, Sebastian and
Lammert, Jacqueline and
Modersohn, Luise and
Boeker, Martin and
Frassinelli, Diego},
editor = "Zhao, Jin and
Wang, Mingyang and
Liu, Zhu",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-srw.84/",
doi = "10.18653/v1/2025.acl-srw.84",
pages = "1064--1078",
ISBN = "979-8-89176-254-1",
abstract = "Due to strict privacy regulations, text corpora in non-English clinical contexts are scarce. Consequently, synthetic data generation using Large Language Models (LLMs) emerges as a promising strategy to address this data gap. To evaluate the ability of LLMs in generating synthetic data, we applied them to our novel German Medical Interview Questions Corpus (GerMedIQ), which consists of 4,524 unique, simulated question-response pairs in German. We augmented our corpus by prompting 18 different LLMs to generate responses to the same questions. Structural and semantic evaluations of the generated responses revealed that large-sized language models produced responses comparable to those provided by humans. Additionally, an LLM-as-a-judge study, combined with a human baseline experiment assessing response acceptability, demonstrated that human raters preferred the responses generated by Mistral (124B) over those produced by humans. Nonetheless, our findings indicate that using LLMs for data augmentation in non-English clinical contexts requires caution."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hofenbitzer-etal-2025-germediq">
<titleInfo>
<title>GerMedIQ: A Resource for Simulated and Synthesized Anamnesis Interview Responses in German</title>
</titleInfo>
<name type="personal">
<namePart type="given">Justin</namePart>
<namePart type="family">Hofenbitzer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Schöning</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Belle</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jacqueline</namePart>
<namePart type="family">Lammert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luise</namePart>
<namePart type="family">Modersohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Martin</namePart>
<namePart type="family">Boeker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diego</namePart>
<namePart type="family">Frassinelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jin</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mingyang</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhu</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-254-1</identifier>
</relatedItem>
<abstract>Due to strict privacy regulations, text corpora in non-English clinical contexts are scarce. Consequently, synthetic data generation using Large Language Models (LLMs) emerges as a promising strategy to address this data gap. To evaluate the ability of LLMs in generating synthetic data, we applied them to our novel German Medical Interview Questions Corpus (GerMedIQ), which consists of 4,524 unique, simulated question-response pairs in German. We augmented our corpus by prompting 18 different LLMs to generate responses to the same questions. Structural and semantic evaluations of the generated responses revealed that large-sized language models produced responses comparable to those provided by humans. Additionally, an LLM-as-a-judge study, combined with a human baseline experiment assessing response acceptability, demonstrated that human raters preferred the responses generated by Mistral (124B) over those produced by humans. Nonetheless, our findings indicate that using LLMs for data augmentation in non-English clinical contexts requires caution.</abstract>
<identifier type="citekey">hofenbitzer-etal-2025-germediq</identifier>
<identifier type="doi">10.18653/v1/2025.acl-srw.84</identifier>
<location>
<url>https://aclanthology.org/2025.acl-srw.84/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>1064</start>
<end>1078</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T GerMedIQ: A Resource for Simulated and Synthesized Anamnesis Interview Responses in German
%A Hofenbitzer, Justin
%A Schöning, Sebastian
%A Belle, Sebastian
%A Lammert, Jacqueline
%A Modersohn, Luise
%A Boeker, Martin
%A Frassinelli, Diego
%Y Zhao, Jin
%Y Wang, Mingyang
%Y Liu, Zhu
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-254-1
%F hofenbitzer-etal-2025-germediq
%X Due to strict privacy regulations, text corpora in non-English clinical contexts are scarce. Consequently, synthetic data generation using Large Language Models (LLMs) emerges as a promising strategy to address this data gap. To evaluate the ability of LLMs in generating synthetic data, we applied them to our novel German Medical Interview Questions Corpus (GerMedIQ), which consists of 4,524 unique, simulated question-response pairs in German. We augmented our corpus by prompting 18 different LLMs to generate responses to the same questions. Structural and semantic evaluations of the generated responses revealed that large-sized language models produced responses comparable to those provided by humans. Additionally, an LLM-as-a-judge study, combined with a human baseline experiment assessing response acceptability, demonstrated that human raters preferred the responses generated by Mistral (124B) over those produced by humans. Nonetheless, our findings indicate that using LLMs for data augmentation in non-English clinical contexts requires caution.
%R 10.18653/v1/2025.acl-srw.84
%U https://aclanthology.org/2025.acl-srw.84/
%U https://doi.org/10.18653/v1/2025.acl-srw.84
%P 1064-1078
Markdown (Informal)
[GerMedIQ: A Resource for Simulated and Synthesized Anamnesis Interview Responses in German](https://aclanthology.org/2025.acl-srw.84/) (Hofenbitzer et al., ACL 2025)
ACL