@inproceedings{nikolaev-etal-2025-factdebug,
title = "{F}act{D}ebug at {S}em{E}val-2025 Task 7: Hybrid Retrieval Pipeline for Identifying Previously Fact-Checked Claims Across Multiple Languages",
author = "Nikolaev, Evgenii and
Bondarenko, Ivan and
Aushev, Islam and
Krikunov, Vasilii and
Glinskii, Andrei and
Konovalov, Vasily and
Belikova, Julia",
editor = "Rosenthal, Sara and
Ros{\'a}, Aiala and
Ghosh, Debanjan and
Zampieri, Marcos",
booktitle = "Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.semeval-1.284/",
pages = "2190--2196",
ISBN = "979-8-89176-273-2",
abstract = "The proliferation of multilingual misinformation demands robust systems for crosslingual fact-checked claim retrieval. This paper addresses SemEval-2025 Shared Task 7, which challenges participants to retrieve fact-checks for social media posts across 14 languages, even when posts and fact-checks are in different languages. We propose a hybrid retrieval pipeline that combines sparse lexical matching (BM25, BGE-m3) and dense semantic retrieval (pretrained and fine-tuned BGE-m3) with dynamic fusion and curriculum-trained rerankers. Our system achieves 67.2{\%} crosslingual and 86.01{\%} monolingual accuracy on the Shared Task MultiClaim dataset."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nikolaev-etal-2025-factdebug">
<titleInfo>
<title>FactDebug at SemEval-2025 Task 7: Hybrid Retrieval Pipeline for Identifying Previously Fact-Checked Claims Across Multiple Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Evgenii</namePart>
<namePart type="family">Nikolaev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Bondarenko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Islam</namePart>
<namePart type="family">Aushev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vasilii</namePart>
<namePart type="family">Krikunov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrei</namePart>
<namePart type="family">Glinskii</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vasily</namePart>
<namePart type="family">Konovalov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Belikova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Rosenthal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aiala</namePart>
<namePart type="family">Rosá</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debanjan</namePart>
<namePart type="family">Ghosh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-273-2</identifier>
</relatedItem>
<abstract>The proliferation of multilingual misinformation demands robust systems for crosslingual fact-checked claim retrieval. This paper addresses SemEval-2025 Shared Task 7, which challenges participants to retrieve fact-checks for social media posts across 14 languages, even when posts and fact-checks are in different languages. We propose a hybrid retrieval pipeline that combines sparse lexical matching (BM25, BGE-m3) and dense semantic retrieval (pretrained and fine-tuned BGE-m3) with dynamic fusion and curriculum-trained rerankers. Our system achieves 67.2% crosslingual and 86.01% monolingual accuracy on the Shared Task MultiClaim dataset.</abstract>
<identifier type="citekey">nikolaev-etal-2025-factdebug</identifier>
<location>
<url>https://aclanthology.org/2025.semeval-1.284/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>2190</start>
<end>2196</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T FactDebug at SemEval-2025 Task 7: Hybrid Retrieval Pipeline for Identifying Previously Fact-Checked Claims Across Multiple Languages
%A Nikolaev, Evgenii
%A Bondarenko, Ivan
%A Aushev, Islam
%A Krikunov, Vasilii
%A Glinskii, Andrei
%A Konovalov, Vasily
%A Belikova, Julia
%Y Rosenthal, Sara
%Y Rosá, Aiala
%Y Ghosh, Debanjan
%Y Zampieri, Marcos
%S Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-273-2
%F nikolaev-etal-2025-factdebug
%X The proliferation of multilingual misinformation demands robust systems for crosslingual fact-checked claim retrieval. This paper addresses SemEval-2025 Shared Task 7, which challenges participants to retrieve fact-checks for social media posts across 14 languages, even when posts and fact-checks are in different languages. We propose a hybrid retrieval pipeline that combines sparse lexical matching (BM25, BGE-m3) and dense semantic retrieval (pretrained and fine-tuned BGE-m3) with dynamic fusion and curriculum-trained rerankers. Our system achieves 67.2% crosslingual and 86.01% monolingual accuracy on the Shared Task MultiClaim dataset.
%U https://aclanthology.org/2025.semeval-1.284/
%P 2190-2196
Markdown (Informal)
[FactDebug at SemEval-2025 Task 7: Hybrid Retrieval Pipeline for Identifying Previously Fact-Checked Claims Across Multiple Languages](https://aclanthology.org/2025.semeval-1.284/) (Nikolaev et al., SemEval 2025)
ACL