@inproceedings{le-etal-2025-lailab,
title = "{LAIL}ab at {A}rch{EHR}-{QA} 2025: Test-time scaling for evidence selection in grounded question answering from electronic health records",
author = "Le, Tuan Dung and
Duong, Thanh and
Haddadan, Shohreh and
Jazayeri, Behzad and
Manley, Brandon and
Thieu, Thanh",
editor = "Soni, Sarvesh and
Demner-Fushman, Dina",
booktitle = "Proceedings of the 24th Workshop on Biomedical Language Processing (Shared Tasks)",
month = aug,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.bionlp-share.9/",
doi = "10.18653/v1/2025.bionlp-share.9",
pages = "75--80",
ISBN = "979-8-89176-276-3",
abstract = "This paper presents our approach to the ArchEHR shared task on generating answers to real-world patient questions grounded in evidence from electronic health records (EHRs). We investigate the zero-shot capabilities of general-purpose, domain-agnostic large language models (LLMs) in two key aspects: identifying essential supporting evidence and producing concise, coherent answers. To this aim, we propose a two-stage pipeline: (1) evidence identification via test-time scaling (TTS) and (2) generating the final answer conditioned on selected evidences from the previous stage.Our approach leverages high-temperature sampling to generate multiple outputs during the evidence selection phase. This TTS-based approach effectively explore more potential evidences which results in significant improvement of the factuality score of the answers."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="le-etal-2025-lailab">
<titleInfo>
<title>LAILab at ArchEHR-QA 2025: Test-time scaling for evidence selection in grounded question answering from electronic health records</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tuan</namePart>
<namePart type="given">Dung</namePart>
<namePart type="family">Le</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thanh</namePart>
<namePart type="family">Duong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shohreh</namePart>
<namePart type="family">Haddadan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Behzad</namePart>
<namePart type="family">Jazayeri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brandon</namePart>
<namePart type="family">Manley</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thanh</namePart>
<namePart type="family">Thieu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 24th Workshop on Biomedical Language Processing (Shared Tasks)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sarvesh</namePart>
<namePart type="family">Soni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dina</namePart>
<namePart type="family">Demner-Fushman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-276-3</identifier>
</relatedItem>
<abstract>This paper presents our approach to the ArchEHR shared task on generating answers to real-world patient questions grounded in evidence from electronic health records (EHRs). We investigate the zero-shot capabilities of general-purpose, domain-agnostic large language models (LLMs) in two key aspects: identifying essential supporting evidence and producing concise, coherent answers. To this aim, we propose a two-stage pipeline: (1) evidence identification via test-time scaling (TTS) and (2) generating the final answer conditioned on selected evidences from the previous stage.Our approach leverages high-temperature sampling to generate multiple outputs during the evidence selection phase. This TTS-based approach effectively explore more potential evidences which results in significant improvement of the factuality score of the answers.</abstract>
<identifier type="citekey">le-etal-2025-lailab</identifier>
<identifier type="doi">10.18653/v1/2025.bionlp-share.9</identifier>
<location>
<url>https://aclanthology.org/2025.bionlp-share.9/</url>
</location>
<part>
<date>2025-08</date>
<extent unit="page">
<start>75</start>
<end>80</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T LAILab at ArchEHR-QA 2025: Test-time scaling for evidence selection in grounded question answering from electronic health records
%A Le, Tuan Dung
%A Duong, Thanh
%A Haddadan, Shohreh
%A Jazayeri, Behzad
%A Manley, Brandon
%A Thieu, Thanh
%Y Soni, Sarvesh
%Y Demner-Fushman, Dina
%S Proceedings of the 24th Workshop on Biomedical Language Processing (Shared Tasks)
%D 2025
%8 August
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-276-3
%F le-etal-2025-lailab
%X This paper presents our approach to the ArchEHR shared task on generating answers to real-world patient questions grounded in evidence from electronic health records (EHRs). We investigate the zero-shot capabilities of general-purpose, domain-agnostic large language models (LLMs) in two key aspects: identifying essential supporting evidence and producing concise, coherent answers. To this aim, we propose a two-stage pipeline: (1) evidence identification via test-time scaling (TTS) and (2) generating the final answer conditioned on selected evidences from the previous stage.Our approach leverages high-temperature sampling to generate multiple outputs during the evidence selection phase. This TTS-based approach effectively explore more potential evidences which results in significant improvement of the factuality score of the answers.
%R 10.18653/v1/2025.bionlp-share.9
%U https://aclanthology.org/2025.bionlp-share.9/
%U https://doi.org/10.18653/v1/2025.bionlp-share.9
%P 75-80
Markdown (Informal)
[LAILab at ArchEHR-QA 2025: Test-time scaling for evidence selection in grounded question answering from electronic health records](https://aclanthology.org/2025.bionlp-share.9/) (Le et al., BioNLP 2025)
ACL