@inproceedings{colelough-etal-2025-overview,
title = "Overview of the {C}lin{IQL}ink 2025 Shared Task on Medical Question-Answering",
author = "Colelough, Brandon and
Bartels, Davis and
Demner-Fushman, Dina",
editor = "Demner-Fushman, Dina and
Ananiadou, Sophia and
Miwa, Makoto and
Tsujii, Junichi",
booktitle = "Proceedings of the 24th Workshop on Biomedical Language Processing",
month = aug,
year = "2025",
address = "Viena, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.bionlp-1.32/",
doi = "10.18653/v1/2025.bionlp-1.32",
pages = "378--387",
ISBN = "979-8-89176-275-6",
abstract = "In this paper, we present an overview of CLINIQLINK a shared task, collocated with the 24th BioNLP workshop at ACL 2025, designed to stress-test large language models (LLMs) on medically-oriented question answering aimed at the level of a General Practitioner. The challenge supplies 4 978 expert-verified, medical source-grounded question{--}answer pairs that cover seven formats - true/false, multiple choice, unordered list, short answer, short-inverse, multi-hop, and multi-hop-inverse. Participating systems, bundled in Docker or Apptainer images, are executed on the CodaBench platform or the University of Maryland{'}s Zaratan cluster. An automated harness (Task 1) scores closed-ended items by exact match and open-ended items with a three-tier embedding metric. A subsequent physician panel (Task 2) audits the top model responses."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="colelough-etal-2025-overview">
<titleInfo>
<title>Overview of the ClinIQLink 2025 Shared Task on Medical Question-Answering</title>
</titleInfo>
<name type="personal">
<namePart type="given">Brandon</namePart>
<namePart type="family">Colelough</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Davis</namePart>
<namePart type="family">Bartels</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dina</namePart>
<namePart type="family">Demner-Fushman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 24th Workshop on Biomedical Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dina</namePart>
<namePart type="family">Demner-Fushman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="family">Ananiadou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Makoto</namePart>
<namePart type="family">Miwa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junichi</namePart>
<namePart type="family">Tsujii</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Viena, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-275-6</identifier>
</relatedItem>
<abstract>In this paper, we present an overview of CLINIQLINK a shared task, collocated with the 24th BioNLP workshop at ACL 2025, designed to stress-test large language models (LLMs) on medically-oriented question answering aimed at the level of a General Practitioner. The challenge supplies 4 978 expert-verified, medical source-grounded question–answer pairs that cover seven formats - true/false, multiple choice, unordered list, short answer, short-inverse, multi-hop, and multi-hop-inverse. Participating systems, bundled in Docker or Apptainer images, are executed on the CodaBench platform or the University of Maryland’s Zaratan cluster. An automated harness (Task 1) scores closed-ended items by exact match and open-ended items with a three-tier embedding metric. A subsequent physician panel (Task 2) audits the top model responses.</abstract>
<identifier type="citekey">colelough-etal-2025-overview</identifier>
<identifier type="doi">10.18653/v1/2025.bionlp-1.32</identifier>
<location>
<url>https://aclanthology.org/2025.bionlp-1.32/</url>
</location>
<part>
<date>2025-08</date>
<extent unit="page">
<start>378</start>
<end>387</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Overview of the ClinIQLink 2025 Shared Task on Medical Question-Answering
%A Colelough, Brandon
%A Bartels, Davis
%A Demner-Fushman, Dina
%Y Demner-Fushman, Dina
%Y Ananiadou, Sophia
%Y Miwa, Makoto
%Y Tsujii, Junichi
%S Proceedings of the 24th Workshop on Biomedical Language Processing
%D 2025
%8 August
%I Association for Computational Linguistics
%C Viena, Austria
%@ 979-8-89176-275-6
%F colelough-etal-2025-overview
%X In this paper, we present an overview of CLINIQLINK a shared task, collocated with the 24th BioNLP workshop at ACL 2025, designed to stress-test large language models (LLMs) on medically-oriented question answering aimed at the level of a General Practitioner. The challenge supplies 4 978 expert-verified, medical source-grounded question–answer pairs that cover seven formats - true/false, multiple choice, unordered list, short answer, short-inverse, multi-hop, and multi-hop-inverse. Participating systems, bundled in Docker or Apptainer images, are executed on the CodaBench platform or the University of Maryland’s Zaratan cluster. An automated harness (Task 1) scores closed-ended items by exact match and open-ended items with a three-tier embedding metric. A subsequent physician panel (Task 2) audits the top model responses.
%R 10.18653/v1/2025.bionlp-1.32
%U https://aclanthology.org/2025.bionlp-1.32/
%U https://doi.org/10.18653/v1/2025.bionlp-1.32
%P 378-387
Markdown (Informal)
[Overview of the ClinIQLink 2025 Shared Task on Medical Question-Answering](https://aclanthology.org/2025.bionlp-1.32/) (Colelough et al., BioNLP 2025)
ACL