@inproceedings{wang-etal-2025-winning,
title = "Winning {C}limate{C}heck: A Multi-Stage System with {BM}25, {BGE}-Reranker Ensembles, and {LLM}-based Analysis for Scientific Abstract Retrieval",
author = "Wang, Junjun and
Chen, Kunlong and
Chen, Zhaoqun and
He, Peng and
Zheng, Wenlu",
editor = "Ghosal, Tirthankar and
Mayr, Philipp and
Singh, Amanpreet and
Naik, Aakanksha and
Rehm, Georg and
Freitag, Dayne and
Li, Dan and
Schimmler, Sonja and
De Waard, Anita",
booktitle = "Proceedings of the Fifth Workshop on Scholarly Document Processing (SDP 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.sdp-1.25/",
doi = "10.18653/v1/2025.sdp-1.25",
pages = "276--280",
ISBN = "979-8-89176-265-7",
abstract = "The ClimateCheck shared task addresses the critical challenge of grounding social media claims about climate change in scientific literature. This paper details our winning approach. For abstract retrieval, we propose a multi-stage pipeline: (1) initial candidate generation from a corpus of {\textasciitilde}400,000 abstracts using BM25; (2) fine-grained reranking of these candidates using an ensemble of BGE-Reranker cross-encoder models, fine-tuned with a specialized training set incorporating both random and hard negative samples; and (3) final list selection based on an RRF-ensembled score. For the verification aspect, we leverage Gemini 2.5 Pro to classify the relationship (Supports, Refutes, Not Enough Information) between claims and the retrieved abstracts, guided by carefully engineered prompts. Our system achieved first place in both subtasks, demonstrating the efficacy of combining robust sparse retrieval, powerful neural rerankers, strategic negative sampling, and LLM-based semantic analysis for connecting social media discourse to scientific evidence. Part of the example code: \url{https://anonymous.4open.science/r/climatecheck_solution-1120}"
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-etal-2025-winning">
<titleInfo>
<title>Winning ClimateCheck: A Multi-Stage System with BM25, BGE-Reranker Ensembles, and LLM-based Analysis for Scientific Abstract Retrieval</title>
</titleInfo>
<name type="personal">
<namePart type="given">Junjun</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kunlong</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhaoqun</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peng</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wenlu</namePart>
<namePart type="family">Zheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Workshop on Scholarly Document Processing (SDP 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tirthankar</namePart>
<namePart type="family">Ghosal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Mayr</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amanpreet</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aakanksha</namePart>
<namePart type="family">Naik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Georg</namePart>
<namePart type="family">Rehm</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dayne</namePart>
<namePart type="family">Freitag</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sonja</namePart>
<namePart type="family">Schimmler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anita</namePart>
<namePart type="family">De Waard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-265-7</identifier>
</relatedItem>
<abstract>The ClimateCheck shared task addresses the critical challenge of grounding social media claims about climate change in scientific literature. This paper details our winning approach. For abstract retrieval, we propose a multi-stage pipeline: (1) initial candidate generation from a corpus of ~400,000 abstracts using BM25; (2) fine-grained reranking of these candidates using an ensemble of BGE-Reranker cross-encoder models, fine-tuned with a specialized training set incorporating both random and hard negative samples; and (3) final list selection based on an RRF-ensembled score. For the verification aspect, we leverage Gemini 2.5 Pro to classify the relationship (Supports, Refutes, Not Enough Information) between claims and the retrieved abstracts, guided by carefully engineered prompts. Our system achieved first place in both subtasks, demonstrating the efficacy of combining robust sparse retrieval, powerful neural rerankers, strategic negative sampling, and LLM-based semantic analysis for connecting social media discourse to scientific evidence. Part of the example code: https://anonymous.4open.science/r/climatecheck_solution-1120</abstract>
<identifier type="citekey">wang-etal-2025-winning</identifier>
<identifier type="doi">10.18653/v1/2025.sdp-1.25</identifier>
<location>
<url>https://aclanthology.org/2025.sdp-1.25/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>276</start>
<end>280</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Winning ClimateCheck: A Multi-Stage System with BM25, BGE-Reranker Ensembles, and LLM-based Analysis for Scientific Abstract Retrieval
%A Wang, Junjun
%A Chen, Kunlong
%A Chen, Zhaoqun
%A He, Peng
%A Zheng, Wenlu
%Y Ghosal, Tirthankar
%Y Mayr, Philipp
%Y Singh, Amanpreet
%Y Naik, Aakanksha
%Y Rehm, Georg
%Y Freitag, Dayne
%Y Li, Dan
%Y Schimmler, Sonja
%Y De Waard, Anita
%S Proceedings of the Fifth Workshop on Scholarly Document Processing (SDP 2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-265-7
%F wang-etal-2025-winning
%X The ClimateCheck shared task addresses the critical challenge of grounding social media claims about climate change in scientific literature. This paper details our winning approach. For abstract retrieval, we propose a multi-stage pipeline: (1) initial candidate generation from a corpus of ~400,000 abstracts using BM25; (2) fine-grained reranking of these candidates using an ensemble of BGE-Reranker cross-encoder models, fine-tuned with a specialized training set incorporating both random and hard negative samples; and (3) final list selection based on an RRF-ensembled score. For the verification aspect, we leverage Gemini 2.5 Pro to classify the relationship (Supports, Refutes, Not Enough Information) between claims and the retrieved abstracts, guided by carefully engineered prompts. Our system achieved first place in both subtasks, demonstrating the efficacy of combining robust sparse retrieval, powerful neural rerankers, strategic negative sampling, and LLM-based semantic analysis for connecting social media discourse to scientific evidence. Part of the example code: https://anonymous.4open.science/r/climatecheck_solution-1120
%R 10.18653/v1/2025.sdp-1.25
%U https://aclanthology.org/2025.sdp-1.25/
%U https://doi.org/10.18653/v1/2025.sdp-1.25
%P 276-280
Markdown (Informal)
[Winning ClimateCheck: A Multi-Stage System with BM25, BGE-Reranker Ensembles, and LLM-based Analysis for Scientific Abstract Retrieval](https://aclanthology.org/2025.sdp-1.25/) (Wang et al., sdp 2025)
ACL