@inproceedings{jones-etal-2026-comparative,
title = "A Comparative Analysis of In-Context Learning and Fine-Tuning for Biomedical Information Retrieval and Sentence Extraction Using Research Domain Criteria",
author = "Jones, Athlene and
Lieu, Khanh and
Kahanda, Indika",
editor = "Demner-Fushman, Dina and
Ananiadou, Sophia and
Roberts, Kirk and
Tsujii, Junichi",
booktitle = "{B}io{NLP} 2026",
month = jul,
year = "2026",
address = "San Diego, California",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.bionlp-1.51/",
pages = "644--655",
ISBN = "979-8-89176-434-7",
abstract = "Research Domain Criteria (RDoC) is a National Institute of Mental Health framework for studying mental disorders by integrating information across genetics, circuits, and behavior. Manually curating biomedical abstracts relevant to RDoC is a significant challenge due to semantically overlapping construct definitions (e.g., ``Acute Threat,'' ``Potential Threat,'' and ``Sustained Threat'') and the exponential growth of biomedical literature. This study compares two modeling strategies, domain-adapted fine-tuning and in-context prompting, across two RDoC-related subtasks from the official BioNLP-OST 2019 RDoC shared task. For Task 1, unlabeled PubMed abstracts are retrieved and ranked by relevance to eight of the RDoC constructs. We compare a TF-IDF baseline against ModernBERT and Llama (zero-shot and five-shot) using Mean Average Precision (MAP). For Task 2, the objective is to identify the single most relevant sentence from an abstract for a given construct, evaluated using per-construct accuracy. The fine-tuning track performs end-to-end fine-tuning of BioBERT, PubMedBERT, ModernBERT, and RoBERTa using a cross-encoder input format and per-construct grid search. These are compared against the in-context learning of several open-source language models. Both our approaches are competitive against the best-performing team{'}s score from the BioNLP-OST 2019 RDoC shared task. Taken together, these findings suggest that five-shot prompted LLMs and domain-adapted fine-tuned transformers are viable tools for semi-automating the expert annotation in RDoC curation."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jones-etal-2026-comparative">
<titleInfo>
<title>A Comparative Analysis of In-Context Learning and Fine-Tuning for Biomedical Information Retrieval and Sentence Extraction Using Research Domain Criteria</title>
</titleInfo>
<name type="personal">
<namePart type="given">Athlene</namePart>
<namePart type="family">Jones</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khanh</namePart>
<namePart type="family">Lieu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Indika</namePart>
<namePart type="family">Kahanda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>BioNLP 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dina</namePart>
<namePart type="family">Demner-Fushman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="family">Ananiadou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kirk</namePart>
<namePart type="family">Roberts</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junichi</namePart>
<namePart type="family">Tsujii</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-434-7</identifier>
</relatedItem>
<abstract>Research Domain Criteria (RDoC) is a National Institute of Mental Health framework for studying mental disorders by integrating information across genetics, circuits, and behavior. Manually curating biomedical abstracts relevant to RDoC is a significant challenge due to semantically overlapping construct definitions (e.g., “Acute Threat,” “Potential Threat,” and “Sustained Threat”) and the exponential growth of biomedical literature. This study compares two modeling strategies, domain-adapted fine-tuning and in-context prompting, across two RDoC-related subtasks from the official BioNLP-OST 2019 RDoC shared task. For Task 1, unlabeled PubMed abstracts are retrieved and ranked by relevance to eight of the RDoC constructs. We compare a TF-IDF baseline against ModernBERT and Llama (zero-shot and five-shot) using Mean Average Precision (MAP). For Task 2, the objective is to identify the single most relevant sentence from an abstract for a given construct, evaluated using per-construct accuracy. The fine-tuning track performs end-to-end fine-tuning of BioBERT, PubMedBERT, ModernBERT, and RoBERTa using a cross-encoder input format and per-construct grid search. These are compared against the in-context learning of several open-source language models. Both our approaches are competitive against the best-performing team’s score from the BioNLP-OST 2019 RDoC shared task. Taken together, these findings suggest that five-shot prompted LLMs and domain-adapted fine-tuned transformers are viable tools for semi-automating the expert annotation in RDoC curation.</abstract>
<identifier type="citekey">jones-etal-2026-comparative</identifier>
<location>
<url>https://aclanthology.org/2026.bionlp-1.51/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>644</start>
<end>655</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Comparative Analysis of In-Context Learning and Fine-Tuning for Biomedical Information Retrieval and Sentence Extraction Using Research Domain Criteria
%A Jones, Athlene
%A Lieu, Khanh
%A Kahanda, Indika
%Y Demner-Fushman, Dina
%Y Ananiadou, Sophia
%Y Roberts, Kirk
%Y Tsujii, Junichi
%S BioNLP 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California
%@ 979-8-89176-434-7
%F jones-etal-2026-comparative
%X Research Domain Criteria (RDoC) is a National Institute of Mental Health framework for studying mental disorders by integrating information across genetics, circuits, and behavior. Manually curating biomedical abstracts relevant to RDoC is a significant challenge due to semantically overlapping construct definitions (e.g., “Acute Threat,” “Potential Threat,” and “Sustained Threat”) and the exponential growth of biomedical literature. This study compares two modeling strategies, domain-adapted fine-tuning and in-context prompting, across two RDoC-related subtasks from the official BioNLP-OST 2019 RDoC shared task. For Task 1, unlabeled PubMed abstracts are retrieved and ranked by relevance to eight of the RDoC constructs. We compare a TF-IDF baseline against ModernBERT and Llama (zero-shot and five-shot) using Mean Average Precision (MAP). For Task 2, the objective is to identify the single most relevant sentence from an abstract for a given construct, evaluated using per-construct accuracy. The fine-tuning track performs end-to-end fine-tuning of BioBERT, PubMedBERT, ModernBERT, and RoBERTa using a cross-encoder input format and per-construct grid search. These are compared against the in-context learning of several open-source language models. Both our approaches are competitive against the best-performing team’s score from the BioNLP-OST 2019 RDoC shared task. Taken together, these findings suggest that five-shot prompted LLMs and domain-adapted fine-tuned transformers are viable tools for semi-automating the expert annotation in RDoC curation.
%U https://aclanthology.org/2026.bionlp-1.51/
%P 644-655
Markdown (Informal)
[A Comparative Analysis of In-Context Learning and Fine-Tuning for Biomedical Information Retrieval and Sentence Extraction Using Research Domain Criteria](https://aclanthology.org/2026.bionlp-1.51/) (Jones et al., BioNLP 2026)
ACL