@inproceedings{pan-etal-2025-umuteam-semeval-2025,
title = "{UMUT}eam at {S}em{E}val-2025 Task 7: Multilingual Fact-Checked Claim Retrieval with {XLM}-{R}o{BERT}a and Self-Alignment Pretraining Strategy",
author = "Pan, Ronghao and
Bernal - Beltr{\'a}n, Tom{\'a}s and
Garc{\'i}a - D{\'i}az, Jos{\'e} Antonio and
Valencia - Garc{\'i}a, Rafael",
editor = "Rosenthal, Sara and
Ros{\'a}, Aiala and
Ghosh, Debanjan and
Zampieri, Marcos",
booktitle = "Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.semeval-1.103/",
pages = "757--762",
ISBN = "979-8-89176-273-2",
abstract = "In today{'}s digital age, the rapid dissemination of information through social networks poses significant challenges in verifying the veracity of shared content. The proliferation of misinformation can have serious consequences, influencing public opinion, policy decisions, and social dynamics. Fact-checking plays a critical role in countering misinformation; however, the manual verification process is time-consuming, especially when dealing with multilingual content. This paper presents our participation in the Multilingual and Crosslingual Fact-Checked Claim Retrieval task (SemEval 2025), which seeks to identify previously fact-checked claims relevant to social media posts. Our proposed system leverages XLM-RoBERTa, a multilingual Transformer model, combined with metric learning and hard negative mining strategies, to optimize the semantic comparison of posts and fact-checks across multiple languages. By fine-tuning a shared embedding space and employing a multiple similarity loss function, our approach enhances retrieval accuracy while maintaining efficiency. Evaluation results demonstrate competitive performance across multiple languages, reaching 25th place and highlighting the potential of multilingual NLP models in automating the fact-checking process and mitigating misinformation spread."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pan-etal-2025-umuteam-semeval-2025">
<titleInfo>
<title>UMUTeam at SemEval-2025 Task 7: Multilingual Fact-Checked Claim Retrieval with XLM-RoBERTa and Self-Alignment Pretraining Strategy</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ronghao</namePart>
<namePart type="family">Pan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tomás</namePart>
<namePart type="family">Bernal - Beltrán</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">José</namePart>
<namePart type="given">Antonio</namePart>
<namePart type="family">García - Díaz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rafael</namePart>
<namePart type="family">Valencia - García</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Rosenthal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aiala</namePart>
<namePart type="family">Rosá</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debanjan</namePart>
<namePart type="family">Ghosh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-273-2</identifier>
</relatedItem>
<abstract>In today’s digital age, the rapid dissemination of information through social networks poses significant challenges in verifying the veracity of shared content. The proliferation of misinformation can have serious consequences, influencing public opinion, policy decisions, and social dynamics. Fact-checking plays a critical role in countering misinformation; however, the manual verification process is time-consuming, especially when dealing with multilingual content. This paper presents our participation in the Multilingual and Crosslingual Fact-Checked Claim Retrieval task (SemEval 2025), which seeks to identify previously fact-checked claims relevant to social media posts. Our proposed system leverages XLM-RoBERTa, a multilingual Transformer model, combined with metric learning and hard negative mining strategies, to optimize the semantic comparison of posts and fact-checks across multiple languages. By fine-tuning a shared embedding space and employing a multiple similarity loss function, our approach enhances retrieval accuracy while maintaining efficiency. Evaluation results demonstrate competitive performance across multiple languages, reaching 25th place and highlighting the potential of multilingual NLP models in automating the fact-checking process and mitigating misinformation spread.</abstract>
<identifier type="citekey">pan-etal-2025-umuteam-semeval-2025</identifier>
<location>
<url>https://aclanthology.org/2025.semeval-1.103/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>757</start>
<end>762</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T UMUTeam at SemEval-2025 Task 7: Multilingual Fact-Checked Claim Retrieval with XLM-RoBERTa and Self-Alignment Pretraining Strategy
%A Pan, Ronghao
%A Bernal - Beltrán, Tomás
%A García - Díaz, José Antonio
%A Valencia - García, Rafael
%Y Rosenthal, Sara
%Y Rosá, Aiala
%Y Ghosh, Debanjan
%Y Zampieri, Marcos
%S Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-273-2
%F pan-etal-2025-umuteam-semeval-2025
%X In today’s digital age, the rapid dissemination of information through social networks poses significant challenges in verifying the veracity of shared content. The proliferation of misinformation can have serious consequences, influencing public opinion, policy decisions, and social dynamics. Fact-checking plays a critical role in countering misinformation; however, the manual verification process is time-consuming, especially when dealing with multilingual content. This paper presents our participation in the Multilingual and Crosslingual Fact-Checked Claim Retrieval task (SemEval 2025), which seeks to identify previously fact-checked claims relevant to social media posts. Our proposed system leverages XLM-RoBERTa, a multilingual Transformer model, combined with metric learning and hard negative mining strategies, to optimize the semantic comparison of posts and fact-checks across multiple languages. By fine-tuning a shared embedding space and employing a multiple similarity loss function, our approach enhances retrieval accuracy while maintaining efficiency. Evaluation results demonstrate competitive performance across multiple languages, reaching 25th place and highlighting the potential of multilingual NLP models in automating the fact-checking process and mitigating misinformation spread.
%U https://aclanthology.org/2025.semeval-1.103/
%P 757-762
Markdown (Informal)
[UMUTeam at SemEval-2025 Task 7: Multilingual Fact-Checked Claim Retrieval with XLM-RoBERTa and Self-Alignment Pretraining Strategy](https://aclanthology.org/2025.semeval-1.103/) (Pan et al., SemEval 2025)
ACL