@inproceedings{remaki-etal-2025-limics,
title = "{LIMICS} at {A}rch{EHR}-{QA} 2025: Prompting {LLM}s Beats Fine-Tuned Embeddings",
author = "Remaki, Adam and
Violle, Armand and
Natraj, Vikram and
Gu{\'e}vel, {\'E}tienne and
Redjdal, Akram",
editor = "Soni, Sarvesh and
Demner-Fushman, Dina",
booktitle = "Proceedings of the 24th Workshop on Biomedical Language Processing (Shared Tasks)",
month = aug,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.bionlp-share.18/",
doi = "10.18653/v1/2025.bionlp-share.18",
pages = "150--159",
ISBN = "979-8-89176-276-3",
abstract = "In this paper, we investigated two approaches to clinical question-answering based on patient-formulated questions, supported by their narratives and brief medical records. The first approach leverages zero- and few-shot prompt engineering techniques with GPT-based Large Language Models (LLMs), incorporating strategies such as prompt chaining and chain-of-thought reasoning to guide the models in generating answers. The second approach adopts a two-steps structure: first, a text-classification stage uses embedding-based models (e.g., BERT variants) to identify sentences within the medical record that are most relevant to the given question; then, we prompt an LLM to paraphrase them into an answer so that it is generated exclusively from these selected sentences. Our empirical results demonstrate that the first approach outperforms the classification-guided pipeline, achieving the highest score on the development set and the test set using prompt chaining. Code: github.com/armandviolle/BioNLP-2025"
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="remaki-etal-2025-limics">
<titleInfo>
<title>LIMICS at ArchEHR-QA 2025: Prompting LLMs Beats Fine-Tuned Embeddings</title>
</titleInfo>
<name type="personal">
<namePart type="given">Adam</namePart>
<namePart type="family">Remaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Armand</namePart>
<namePart type="family">Violle</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vikram</namePart>
<namePart type="family">Natraj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Étienne</namePart>
<namePart type="family">Guével</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Akram</namePart>
<namePart type="family">Redjdal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 24th Workshop on Biomedical Language Processing (Shared Tasks)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sarvesh</namePart>
<namePart type="family">Soni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dina</namePart>
<namePart type="family">Demner-Fushman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-276-3</identifier>
</relatedItem>
<abstract>In this paper, we investigated two approaches to clinical question-answering based on patient-formulated questions, supported by their narratives and brief medical records. The first approach leverages zero- and few-shot prompt engineering techniques with GPT-based Large Language Models (LLMs), incorporating strategies such as prompt chaining and chain-of-thought reasoning to guide the models in generating answers. The second approach adopts a two-steps structure: first, a text-classification stage uses embedding-based models (e.g., BERT variants) to identify sentences within the medical record that are most relevant to the given question; then, we prompt an LLM to paraphrase them into an answer so that it is generated exclusively from these selected sentences. Our empirical results demonstrate that the first approach outperforms the classification-guided pipeline, achieving the highest score on the development set and the test set using prompt chaining. Code: github.com/armandviolle/BioNLP-2025</abstract>
<identifier type="citekey">remaki-etal-2025-limics</identifier>
<identifier type="doi">10.18653/v1/2025.bionlp-share.18</identifier>
<location>
<url>https://aclanthology.org/2025.bionlp-share.18/</url>
</location>
<part>
<date>2025-08</date>
<extent unit="page">
<start>150</start>
<end>159</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T LIMICS at ArchEHR-QA 2025: Prompting LLMs Beats Fine-Tuned Embeddings
%A Remaki, Adam
%A Violle, Armand
%A Natraj, Vikram
%A Guével, Étienne
%A Redjdal, Akram
%Y Soni, Sarvesh
%Y Demner-Fushman, Dina
%S Proceedings of the 24th Workshop on Biomedical Language Processing (Shared Tasks)
%D 2025
%8 August
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-276-3
%F remaki-etal-2025-limics
%X In this paper, we investigated two approaches to clinical question-answering based on patient-formulated questions, supported by their narratives and brief medical records. The first approach leverages zero- and few-shot prompt engineering techniques with GPT-based Large Language Models (LLMs), incorporating strategies such as prompt chaining and chain-of-thought reasoning to guide the models in generating answers. The second approach adopts a two-steps structure: first, a text-classification stage uses embedding-based models (e.g., BERT variants) to identify sentences within the medical record that are most relevant to the given question; then, we prompt an LLM to paraphrase them into an answer so that it is generated exclusively from these selected sentences. Our empirical results demonstrate that the first approach outperforms the classification-guided pipeline, achieving the highest score on the development set and the test set using prompt chaining. Code: github.com/armandviolle/BioNLP-2025
%R 10.18653/v1/2025.bionlp-share.18
%U https://aclanthology.org/2025.bionlp-share.18/
%U https://doi.org/10.18653/v1/2025.bionlp-share.18
%P 150-159
Markdown (Informal)
[LIMICS at ArchEHR-QA 2025: Prompting LLMs Beats Fine-Tuned Embeddings](https://aclanthology.org/2025.bionlp-share.18/) (Remaki et al., BioNLP 2025)
ACL