@inproceedings{buns-etal-2025-wispermed,
title = "{W}is{P}er{M}ed at {A}rch{EHR}-{QA} 2025: A Modular, Relevance-First Approach for Grounded Question Answering on Eletronic Health Records",
author = {B{\"u}ns, Jan-Henning and
Damm, Hendrik and
Pakull, Tabea and
Nensa, Felix and
Livingstone, Elisabeth},
editor = "Soni, Sarvesh and
Demner-Fushman, Dina",
booktitle = "Proceedings of the 24th Workshop on Biomedical Language Processing (Shared Tasks)",
month = aug,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.bionlp-share.5/",
doi = "10.18653/v1/2025.bionlp-share.5",
pages = "41--49",
ISBN = "979-8-89176-276-3",
abstract = "Automatically answering patient questions based on electronic health records (EHRs) requires systems that both identify relevant evidence and generate accurate, grounded responses. We present a three-part pipeline developed by WisPerMed for the ArchEHR-QA 2025 shared task. First, a fine-tuned BioClinicalBERT model classifies note sentences by their relevance using synonym-based and paraphrased data augmentation. Second, a constrained generation step uses DistilBART-MedSummary to produce faithful answers strictly limited to top-ranked evidence. Third, we align each answer sentence to its supporting evidence via BiomedBERT embeddings and ROUGE-based similarity scoring to ensure citation transparency. Our system achieved a 35.0{\%} overall score on the hidden test set, outperforming the organizer{'}s baseline by 4.3 percentage points. Gains in BERTScore (+44{\%}) and SARI (+119{\%}) highlight substantial improvements in semantic accuracy and relevance. This modular approach demonstrates that enforcing evidence-awareness and citation grounding enhances both answer quality and trustworthiness in clinical QA systems."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="buns-etal-2025-wispermed">
<titleInfo>
<title>WisPerMed at ArchEHR-QA 2025: A Modular, Relevance-First Approach for Grounded Question Answering on Eletronic Health Records</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jan-Henning</namePart>
<namePart type="family">Büns</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hendrik</namePart>
<namePart type="family">Damm</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tabea</namePart>
<namePart type="family">Pakull</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Felix</namePart>
<namePart type="family">Nensa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elisabeth</namePart>
<namePart type="family">Livingstone</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 24th Workshop on Biomedical Language Processing (Shared Tasks)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sarvesh</namePart>
<namePart type="family">Soni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dina</namePart>
<namePart type="family">Demner-Fushman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-276-3</identifier>
</relatedItem>
<abstract>Automatically answering patient questions based on electronic health records (EHRs) requires systems that both identify relevant evidence and generate accurate, grounded responses. We present a three-part pipeline developed by WisPerMed for the ArchEHR-QA 2025 shared task. First, a fine-tuned BioClinicalBERT model classifies note sentences by their relevance using synonym-based and paraphrased data augmentation. Second, a constrained generation step uses DistilBART-MedSummary to produce faithful answers strictly limited to top-ranked evidence. Third, we align each answer sentence to its supporting evidence via BiomedBERT embeddings and ROUGE-based similarity scoring to ensure citation transparency. Our system achieved a 35.0% overall score on the hidden test set, outperforming the organizer’s baseline by 4.3 percentage points. Gains in BERTScore (+44%) and SARI (+119%) highlight substantial improvements in semantic accuracy and relevance. This modular approach demonstrates that enforcing evidence-awareness and citation grounding enhances both answer quality and trustworthiness in clinical QA systems.</abstract>
<identifier type="citekey">buns-etal-2025-wispermed</identifier>
<identifier type="doi">10.18653/v1/2025.bionlp-share.5</identifier>
<location>
<url>https://aclanthology.org/2025.bionlp-share.5/</url>
</location>
<part>
<date>2025-08</date>
<extent unit="page">
<start>41</start>
<end>49</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T WisPerMed at ArchEHR-QA 2025: A Modular, Relevance-First Approach for Grounded Question Answering on Eletronic Health Records
%A Büns, Jan-Henning
%A Damm, Hendrik
%A Pakull, Tabea
%A Nensa, Felix
%A Livingstone, Elisabeth
%Y Soni, Sarvesh
%Y Demner-Fushman, Dina
%S Proceedings of the 24th Workshop on Biomedical Language Processing (Shared Tasks)
%D 2025
%8 August
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-276-3
%F buns-etal-2025-wispermed
%X Automatically answering patient questions based on electronic health records (EHRs) requires systems that both identify relevant evidence and generate accurate, grounded responses. We present a three-part pipeline developed by WisPerMed for the ArchEHR-QA 2025 shared task. First, a fine-tuned BioClinicalBERT model classifies note sentences by their relevance using synonym-based and paraphrased data augmentation. Second, a constrained generation step uses DistilBART-MedSummary to produce faithful answers strictly limited to top-ranked evidence. Third, we align each answer sentence to its supporting evidence via BiomedBERT embeddings and ROUGE-based similarity scoring to ensure citation transparency. Our system achieved a 35.0% overall score on the hidden test set, outperforming the organizer’s baseline by 4.3 percentage points. Gains in BERTScore (+44%) and SARI (+119%) highlight substantial improvements in semantic accuracy and relevance. This modular approach demonstrates that enforcing evidence-awareness and citation grounding enhances both answer quality and trustworthiness in clinical QA systems.
%R 10.18653/v1/2025.bionlp-share.5
%U https://aclanthology.org/2025.bionlp-share.5/
%U https://doi.org/10.18653/v1/2025.bionlp-share.5
%P 41-49
Markdown (Informal)
[WisPerMed at ArchEHR-QA 2025: A Modular, Relevance-First Approach for Grounded Question Answering on Eletronic Health Records](https://aclanthology.org/2025.bionlp-share.5/) (Büns et al., BioNLP 2025)
ACL