@inproceedings{kadusabe-etal-2025-dkitnlp,
title = "{DKITNLP} at {A}rch{EHR}-{QA} 2025: A Retrieval Augmented {LLM} Pipeline for Evidence-Based Patient Question Answering",
author = "Kadusabe, Provia and
Kaushik, Abhishek and
Lawless, Fiona",
editor = "Soni, Sarvesh and
Demner-Fushman, Dina",
booktitle = "Proceedings of the 24th Workshop on Biomedical Language Processing (Shared Tasks)",
month = aug,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.bionlp-share.20/",
doi = "10.18653/v1/2025.bionlp-share.20",
pages = "165--170",
ISBN = "979-8-89176-276-3",
abstract = "This paper describes our submission for the BioNLP ACL 2025 Shared task on grounded Question Answering (QA) from Electronic Health Records (EHRs). The task aims to automatically generate answers to patients' health related questions that are grounded in the evidence from their clinical notes. We propose a two stage retrieval pipeline to identify relevant sentences to guide response generation by a Large Language Model (LLM). Specifically, our approach uses a BioBERT based bi-encoder for initial retrieval, followed by a re-ranking step using a fine-tuned cross-encoder to enhance retrieval precision. The final set of selected sentences serve as an input to Mistral 7B model which generates answers through few-shot prompting. Our approach achieves an overall score of 31.6 on the test set, outperforming a substantially larger baseline model LLaMA 3.3 70B (30.7), which demonstrates the effectiveness of retrieval-augmented generation for grounded QA."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kadusabe-etal-2025-dkitnlp">
<titleInfo>
<title>DKITNLP at ArchEHR-QA 2025: A Retrieval Augmented LLM Pipeline for Evidence-Based Patient Question Answering</title>
</titleInfo>
<name type="personal">
<namePart type="given">Provia</namePart>
<namePart type="family">Kadusabe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abhishek</namePart>
<namePart type="family">Kaushik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fiona</namePart>
<namePart type="family">Lawless</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 24th Workshop on Biomedical Language Processing (Shared Tasks)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sarvesh</namePart>
<namePart type="family">Soni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dina</namePart>
<namePart type="family">Demner-Fushman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-276-3</identifier>
</relatedItem>
<abstract>This paper describes our submission for the BioNLP ACL 2025 Shared task on grounded Question Answering (QA) from Electronic Health Records (EHRs). The task aims to automatically generate answers to patients’ health related questions that are grounded in the evidence from their clinical notes. We propose a two stage retrieval pipeline to identify relevant sentences to guide response generation by a Large Language Model (LLM). Specifically, our approach uses a BioBERT based bi-encoder for initial retrieval, followed by a re-ranking step using a fine-tuned cross-encoder to enhance retrieval precision. The final set of selected sentences serve as an input to Mistral 7B model which generates answers through few-shot prompting. Our approach achieves an overall score of 31.6 on the test set, outperforming a substantially larger baseline model LLaMA 3.3 70B (30.7), which demonstrates the effectiveness of retrieval-augmented generation for grounded QA.</abstract>
<identifier type="citekey">kadusabe-etal-2025-dkitnlp</identifier>
<identifier type="doi">10.18653/v1/2025.bionlp-share.20</identifier>
<location>
<url>https://aclanthology.org/2025.bionlp-share.20/</url>
</location>
<part>
<date>2025-08</date>
<extent unit="page">
<start>165</start>
<end>170</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DKITNLP at ArchEHR-QA 2025: A Retrieval Augmented LLM Pipeline for Evidence-Based Patient Question Answering
%A Kadusabe, Provia
%A Kaushik, Abhishek
%A Lawless, Fiona
%Y Soni, Sarvesh
%Y Demner-Fushman, Dina
%S Proceedings of the 24th Workshop on Biomedical Language Processing (Shared Tasks)
%D 2025
%8 August
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-276-3
%F kadusabe-etal-2025-dkitnlp
%X This paper describes our submission for the BioNLP ACL 2025 Shared task on grounded Question Answering (QA) from Electronic Health Records (EHRs). The task aims to automatically generate answers to patients’ health related questions that are grounded in the evidence from their clinical notes. We propose a two stage retrieval pipeline to identify relevant sentences to guide response generation by a Large Language Model (LLM). Specifically, our approach uses a BioBERT based bi-encoder for initial retrieval, followed by a re-ranking step using a fine-tuned cross-encoder to enhance retrieval precision. The final set of selected sentences serve as an input to Mistral 7B model which generates answers through few-shot prompting. Our approach achieves an overall score of 31.6 on the test set, outperforming a substantially larger baseline model LLaMA 3.3 70B (30.7), which demonstrates the effectiveness of retrieval-augmented generation for grounded QA.
%R 10.18653/v1/2025.bionlp-share.20
%U https://aclanthology.org/2025.bionlp-share.20/
%U https://doi.org/10.18653/v1/2025.bionlp-share.20
%P 165-170
Markdown (Informal)
[DKITNLP at ArchEHR-QA 2025: A Retrieval Augmented LLM Pipeline for Evidence-Based Patient Question Answering](https://aclanthology.org/2025.bionlp-share.20/) (Kadusabe et al., BioNLP 2025)
ACL