@inproceedings{enache-2025-unibuc,
title = "{U}ni{B}uc-{AE} at {S}em{E}val-2025 Task 7: Training Text Embedding Models for Multilingual and Crosslingual Fact-Checked Claim Retrieval",
author = "Enache, Alexandru",
editor = "Rosenthal, Sara and
Ros{\'a}, Aiala and
Ghosh, Debanjan and
Zampieri, Marcos",
booktitle = "Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.semeval-1.89/",
pages = "634--639",
ISBN = "979-8-89176-273-2",
abstract = "This paper describes our approach to the SemEval-2025 Task 7: Multilingual and Crosslingual Fact-Checked Claim Retrieval on both the monolingual and crosslingual tracks. Our training methodology for text embedding models combines contrastive pre-training and hard negatives mining in order to fine-tune models from the E5 family. Additionally, we introduce a novel approach for merging the results from multiple models by finding the best majority vote weighted configuration for each subtask using the validation dataset. Our team ranked 6th in the monolingual track scoring a 0.934 S@10 averaged over all languages and achieved a 0.79 S@10 on the crosslingual task, ranking 8th in this track."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="enache-2025-unibuc">
<titleInfo>
<title>UniBuc-AE at SemEval-2025 Task 7: Training Text Embedding Models for Multilingual and Crosslingual Fact-Checked Claim Retrieval</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alexandru</namePart>
<namePart type="family">Enache</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Rosenthal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aiala</namePart>
<namePart type="family">Rosá</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debanjan</namePart>
<namePart type="family">Ghosh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-273-2</identifier>
</relatedItem>
<abstract>This paper describes our approach to the SemEval-2025 Task 7: Multilingual and Crosslingual Fact-Checked Claim Retrieval on both the monolingual and crosslingual tracks. Our training methodology for text embedding models combines contrastive pre-training and hard negatives mining in order to fine-tune models from the E5 family. Additionally, we introduce a novel approach for merging the results from multiple models by finding the best majority vote weighted configuration for each subtask using the validation dataset. Our team ranked 6th in the monolingual track scoring a 0.934 S@10 averaged over all languages and achieved a 0.79 S@10 on the crosslingual task, ranking 8th in this track.</abstract>
<identifier type="citekey">enache-2025-unibuc</identifier>
<location>
<url>https://aclanthology.org/2025.semeval-1.89/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>634</start>
<end>639</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T UniBuc-AE at SemEval-2025 Task 7: Training Text Embedding Models for Multilingual and Crosslingual Fact-Checked Claim Retrieval
%A Enache, Alexandru
%Y Rosenthal, Sara
%Y Rosá, Aiala
%Y Ghosh, Debanjan
%Y Zampieri, Marcos
%S Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-273-2
%F enache-2025-unibuc
%X This paper describes our approach to the SemEval-2025 Task 7: Multilingual and Crosslingual Fact-Checked Claim Retrieval on both the monolingual and crosslingual tracks. Our training methodology for text embedding models combines contrastive pre-training and hard negatives mining in order to fine-tune models from the E5 family. Additionally, we introduce a novel approach for merging the results from multiple models by finding the best majority vote weighted configuration for each subtask using the validation dataset. Our team ranked 6th in the monolingual track scoring a 0.934 S@10 averaged over all languages and achieved a 0.79 S@10 on the crosslingual task, ranking 8th in this track.
%U https://aclanthology.org/2025.semeval-1.89/
%P 634-639
Markdown (Informal)
[UniBuc-AE at SemEval-2025 Task 7: Training Text Embedding Models for Multilingual and Crosslingual Fact-Checked Claim Retrieval](https://aclanthology.org/2025.semeval-1.89/) (Enache, SemEval 2025)
ACL