@inproceedings{shields-menard-etal-2025-utsa,
title = "{UTSA}-{NLP} at {A}rch{EHR}-{QA} 2025: Improving {EHR} Question Answering via Self-Consistency Prompting",
author = "Shields-Menard, Sara and
Reimers, Zach and
Gardner, Joshua and
Perry, David and
Rios, Anthony",
editor = "Soni, Sarvesh and
Demner-Fushman, Dina",
booktitle = "Proceedings of the 24th Workshop on Biomedical Language Processing (Shared Tasks)",
month = aug,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.bionlp-share.10/",
doi = "10.18653/v1/2025.bionlp-share.10",
pages = "81--90",
ISBN = "979-8-89176-276-3",
abstract = "We describe our system for the ArchEHR-QA Shared Task on answering clinical questions using electronic health records (EHRs). Our approach uses large language models in two steps: first, to find sentences in the EHR relevant to a clinician{'}s question, and second, to generate a short, citation-supported response based on those sentences. We use few-shot prompting, self-consistency, and thresholding to improve the sentence classification step to decide which sentences are essential. We compare several models and find that a smaller 8B model performs better than a larger 70B model for identifying relevant information. Our results show that accurate sentence selection is critical for generating high-quality responses and that self-consistency with thresholding helps make these decisions more reliable."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="shields-menard-etal-2025-utsa">
<titleInfo>
<title>UTSA-NLP at ArchEHR-QA 2025: Improving EHR Question Answering via Self-Consistency Prompting</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Shields-Menard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zach</namePart>
<namePart type="family">Reimers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joshua</namePart>
<namePart type="family">Gardner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Perry</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anthony</namePart>
<namePart type="family">Rios</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 24th Workshop on Biomedical Language Processing (Shared Tasks)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sarvesh</namePart>
<namePart type="family">Soni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dina</namePart>
<namePart type="family">Demner-Fushman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-276-3</identifier>
</relatedItem>
<abstract>We describe our system for the ArchEHR-QA Shared Task on answering clinical questions using electronic health records (EHRs). Our approach uses large language models in two steps: first, to find sentences in the EHR relevant to a clinician’s question, and second, to generate a short, citation-supported response based on those sentences. We use few-shot prompting, self-consistency, and thresholding to improve the sentence classification step to decide which sentences are essential. We compare several models and find that a smaller 8B model performs better than a larger 70B model for identifying relevant information. Our results show that accurate sentence selection is critical for generating high-quality responses and that self-consistency with thresholding helps make these decisions more reliable.</abstract>
<identifier type="citekey">shields-menard-etal-2025-utsa</identifier>
<identifier type="doi">10.18653/v1/2025.bionlp-share.10</identifier>
<location>
<url>https://aclanthology.org/2025.bionlp-share.10/</url>
</location>
<part>
<date>2025-08</date>
<extent unit="page">
<start>81</start>
<end>90</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T UTSA-NLP at ArchEHR-QA 2025: Improving EHR Question Answering via Self-Consistency Prompting
%A Shields-Menard, Sara
%A Reimers, Zach
%A Gardner, Joshua
%A Perry, David
%A Rios, Anthony
%Y Soni, Sarvesh
%Y Demner-Fushman, Dina
%S Proceedings of the 24th Workshop on Biomedical Language Processing (Shared Tasks)
%D 2025
%8 August
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-276-3
%F shields-menard-etal-2025-utsa
%X We describe our system for the ArchEHR-QA Shared Task on answering clinical questions using electronic health records (EHRs). Our approach uses large language models in two steps: first, to find sentences in the EHR relevant to a clinician’s question, and second, to generate a short, citation-supported response based on those sentences. We use few-shot prompting, self-consistency, and thresholding to improve the sentence classification step to decide which sentences are essential. We compare several models and find that a smaller 8B model performs better than a larger 70B model for identifying relevant information. Our results show that accurate sentence selection is critical for generating high-quality responses and that self-consistency with thresholding helps make these decisions more reliable.
%R 10.18653/v1/2025.bionlp-share.10
%U https://aclanthology.org/2025.bionlp-share.10/
%U https://doi.org/10.18653/v1/2025.bionlp-share.10
%P 81-90
Markdown (Informal)
[UTSA-NLP at ArchEHR-QA 2025: Improving EHR Question Answering via Self-Consistency Prompting](https://aclanthology.org/2025.bionlp-share.10/) (Shields-Menard et al., BioNLP 2025)
ACL