@inproceedings{ebinesar-etal-2026-pride,
title = "Pride-Boiler at {M}ed{G}en{V}id{QA} 2026: {LLM}-Augmented {BM}25 Retrieval with Corrective Self-Verification for Biomedical Evidence Retrieval",
author = "Ebinesar, Basil and
Jiang, Keyuan and
Maddineni, Charansai and
Raja, Ashok",
editor = "Gupta, Deepak and
Demner-Fushman, Dina",
booktitle = "Proceedings of the {B}io{NLP} 2026 (Shared Tasks)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.bionlp-2.33/",
pages = "248--256",
ISBN = "979-8-89176-435-4",
abstract = "This paper describes the Pride-Boiler system submitted to MedGenVidQA 2026 Shared Task A, which asks for retrieving relevant PubMed articles and medical instructional videos in response to consumer health queries. Our approach pairs Pyserini BM25 retrieval with LLM-driven query rewriting and a corrective self-verification loop inspired by the Corrective Retrieval-Augmented Generation (CRAG) paradigm. Given a consumer query, the pipeline first asks Google Gemini to generate clinically optimized search text, one targeting PubMed abstracts with MeSH terms and clinical synonyms, and another targeting video subtitles with procedural action language. BM25 retrieves a broad candidate pool, and Gemini then scores each candidate against the original query, blending its relevance judgment with the normalized lexical signal. A quality grader assesses the top results: if they are judged insufficient, the pipeline triggers a corrective cycle with reformulated terminology and retries up to three attempts. The entire workflow is orchestrated as a LangGraph state machine. In the official shared task evaluation, Pride-Boiler ranked first among all participating systems on PubMed article retrieval, achieving an nDCG of 0.6532 and MAP of 0.5550, both exceeding the organizer-provided Text-RR baseline. Our performance on video (text) retrieval achieves 0.5304 in MAP and 0.5927 in nDCG, outperforming other systems but falling below that of baseline, indicating the structural limitations of lexical matching over noisy subtitle text. We release the pipeline code to support reproducibility on GitHub at https://github.com/basilll007/BioNLP."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ebinesar-etal-2026-pride">
<titleInfo>
<title>Pride-Boiler at MedGenVidQA 2026: LLM-Augmented BM25 Retrieval with Corrective Self-Verification for Biomedical Evidence Retrieval</title>
</titleInfo>
<name type="personal">
<namePart type="given">Basil</namePart>
<namePart type="family">Ebinesar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Keyuan</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Charansai</namePart>
<namePart type="family">Maddineni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ashok</namePart>
<namePart type="family">Raja</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the BioNLP 2026 (Shared Tasks)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Deepak</namePart>
<namePart type="family">Gupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dina</namePart>
<namePart type="family">Demner-Fushman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-435-4</identifier>
</relatedItem>
<abstract>This paper describes the Pride-Boiler system submitted to MedGenVidQA 2026 Shared Task A, which asks for retrieving relevant PubMed articles and medical instructional videos in response to consumer health queries. Our approach pairs Pyserini BM25 retrieval with LLM-driven query rewriting and a corrective self-verification loop inspired by the Corrective Retrieval-Augmented Generation (CRAG) paradigm. Given a consumer query, the pipeline first asks Google Gemini to generate clinically optimized search text, one targeting PubMed abstracts with MeSH terms and clinical synonyms, and another targeting video subtitles with procedural action language. BM25 retrieves a broad candidate pool, and Gemini then scores each candidate against the original query, blending its relevance judgment with the normalized lexical signal. A quality grader assesses the top results: if they are judged insufficient, the pipeline triggers a corrective cycle with reformulated terminology and retries up to three attempts. The entire workflow is orchestrated as a LangGraph state machine. In the official shared task evaluation, Pride-Boiler ranked first among all participating systems on PubMed article retrieval, achieving an nDCG of 0.6532 and MAP of 0.5550, both exceeding the organizer-provided Text-RR baseline. Our performance on video (text) retrieval achieves 0.5304 in MAP and 0.5927 in nDCG, outperforming other systems but falling below that of baseline, indicating the structural limitations of lexical matching over noisy subtitle text. We release the pipeline code to support reproducibility on GitHub at https://github.com/basilll007/BioNLP.</abstract>
<identifier type="citekey">ebinesar-etal-2026-pride</identifier>
<location>
<url>https://aclanthology.org/2026.bionlp-2.33/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>248</start>
<end>256</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Pride-Boiler at MedGenVidQA 2026: LLM-Augmented BM25 Retrieval with Corrective Self-Verification for Biomedical Evidence Retrieval
%A Ebinesar, Basil
%A Jiang, Keyuan
%A Maddineni, Charansai
%A Raja, Ashok
%Y Gupta, Deepak
%Y Demner-Fushman, Dina
%S Proceedings of the BioNLP 2026 (Shared Tasks)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, USA
%@ 979-8-89176-435-4
%F ebinesar-etal-2026-pride
%X This paper describes the Pride-Boiler system submitted to MedGenVidQA 2026 Shared Task A, which asks for retrieving relevant PubMed articles and medical instructional videos in response to consumer health queries. Our approach pairs Pyserini BM25 retrieval with LLM-driven query rewriting and a corrective self-verification loop inspired by the Corrective Retrieval-Augmented Generation (CRAG) paradigm. Given a consumer query, the pipeline first asks Google Gemini to generate clinically optimized search text, one targeting PubMed abstracts with MeSH terms and clinical synonyms, and another targeting video subtitles with procedural action language. BM25 retrieves a broad candidate pool, and Gemini then scores each candidate against the original query, blending its relevance judgment with the normalized lexical signal. A quality grader assesses the top results: if they are judged insufficient, the pipeline triggers a corrective cycle with reformulated terminology and retries up to three attempts. The entire workflow is orchestrated as a LangGraph state machine. In the official shared task evaluation, Pride-Boiler ranked first among all participating systems on PubMed article retrieval, achieving an nDCG of 0.6532 and MAP of 0.5550, both exceeding the organizer-provided Text-RR baseline. Our performance on video (text) retrieval achieves 0.5304 in MAP and 0.5927 in nDCG, outperforming other systems but falling below that of baseline, indicating the structural limitations of lexical matching over noisy subtitle text. We release the pipeline code to support reproducibility on GitHub at https://github.com/basilll007/BioNLP.
%U https://aclanthology.org/2026.bionlp-2.33/
%P 248-256
Markdown (Informal)
[Pride-Boiler at MedGenVidQA 2026: LLM-Augmented BM25 Retrieval with Corrective Self-Verification for Biomedical Evidence Retrieval](https://aclanthology.org/2026.bionlp-2.33/) (Ebinesar et al., BioNLP 2026)
ACL