@inproceedings{agarwal-etal-2025-cogstack,
title = "{C}og{S}tack-{KCL}-{UCL} at {A}rch{EHR}-{QA} 2025: Investigating Hybrid {LLM} Approaches for Grounded Clinical Question Answering",
author = "Agarwal, Shubham and
Searle, Thomas and
Noor, Kawsar and
Dobson, Richard",
editor = "Soni, Sarvesh and
Demner-Fushman, Dina",
booktitle = "Proceedings of the 24th Workshop on Biomedical Language Processing (Shared Tasks)",
month = aug,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.bionlp-share.16/",
doi = "10.18653/v1/2025.bionlp-share.16",
pages = "126--135",
ISBN = "979-8-89176-276-3",
abstract = "We present our system for the ArchEHR shared task, which focuses on answering clinical and patient-facing questions grounded in real-world EHR data. Our core contribution is a 2-Stage prompting pipeline that separates evidence selection from answer generation while employing in-context learning strategies. Our experimentation leveraged the open-weight Gemma-v3 family of models, with our best submission using the Gemma-12B model securing 5th place overall on the unseen test set. Through systematic experimentation, we demonstrate the effectiveness of task decomposition in improving both factual accuracy and answer relevance in grounded clinical question answering."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="agarwal-etal-2025-cogstack">
<titleInfo>
<title>CogStack-KCL-UCL at ArchEHR-QA 2025: Investigating Hybrid LLM Approaches for Grounded Clinical Question Answering</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shubham</namePart>
<namePart type="family">Agarwal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thomas</namePart>
<namePart type="family">Searle</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kawsar</namePart>
<namePart type="family">Noor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="family">Dobson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 24th Workshop on Biomedical Language Processing (Shared Tasks)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sarvesh</namePart>
<namePart type="family">Soni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dina</namePart>
<namePart type="family">Demner-Fushman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-276-3</identifier>
</relatedItem>
<abstract>We present our system for the ArchEHR shared task, which focuses on answering clinical and patient-facing questions grounded in real-world EHR data. Our core contribution is a 2-Stage prompting pipeline that separates evidence selection from answer generation while employing in-context learning strategies. Our experimentation leveraged the open-weight Gemma-v3 family of models, with our best submission using the Gemma-12B model securing 5th place overall on the unseen test set. Through systematic experimentation, we demonstrate the effectiveness of task decomposition in improving both factual accuracy and answer relevance in grounded clinical question answering.</abstract>
<identifier type="citekey">agarwal-etal-2025-cogstack</identifier>
<identifier type="doi">10.18653/v1/2025.bionlp-share.16</identifier>
<location>
<url>https://aclanthology.org/2025.bionlp-share.16/</url>
</location>
<part>
<date>2025-08</date>
<extent unit="page">
<start>126</start>
<end>135</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CogStack-KCL-UCL at ArchEHR-QA 2025: Investigating Hybrid LLM Approaches for Grounded Clinical Question Answering
%A Agarwal, Shubham
%A Searle, Thomas
%A Noor, Kawsar
%A Dobson, Richard
%Y Soni, Sarvesh
%Y Demner-Fushman, Dina
%S Proceedings of the 24th Workshop on Biomedical Language Processing (Shared Tasks)
%D 2025
%8 August
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-276-3
%F agarwal-etal-2025-cogstack
%X We present our system for the ArchEHR shared task, which focuses on answering clinical and patient-facing questions grounded in real-world EHR data. Our core contribution is a 2-Stage prompting pipeline that separates evidence selection from answer generation while employing in-context learning strategies. Our experimentation leveraged the open-weight Gemma-v3 family of models, with our best submission using the Gemma-12B model securing 5th place overall on the unseen test set. Through systematic experimentation, we demonstrate the effectiveness of task decomposition in improving both factual accuracy and answer relevance in grounded clinical question answering.
%R 10.18653/v1/2025.bionlp-share.16
%U https://aclanthology.org/2025.bionlp-share.16/
%U https://doi.org/10.18653/v1/2025.bionlp-share.16
%P 126-135
Markdown (Informal)
[CogStack-KCL-UCL at ArchEHR-QA 2025: Investigating Hybrid LLM Approaches for Grounded Clinical Question Answering](https://aclanthology.org/2025.bionlp-share.16/) (Agarwal et al., BioNLP 2025)
ACL