@inproceedings{yadav-etal-2025-hope,
title = "Hope{\_}for{\_}best@{LT}-{EDI} 2025: Detecting Racial Hoaxes in Code-Mixed {H}indi-{E}nglish Social Media Data using a multi-phase fine-tuning strategy",
author = "Yadav, Abhishek Singh and
Sharma, Deepawali and
Singh, Aakash and
Singh, Vivek Kumar",
editor = "Gkirtzou, Katerina and
{\v{Z}}itnik, Slavko and
Gracia, Jorge and
Gromann, Dagmar and
di Buono, Maria Pia and
Monti, Johanna and
Ionov, Maxim",
booktitle = "Proceedings of the 5th Conference on Language, Data and Knowledge: Fifth Workshop on Language Technology for Equality, Diversity, Inclusion",
month = sep,
year = "2025",
address = "Naples, Italy",
publisher = "Unior Press",
url = "https://aclanthology.org/2025.ltedi-1.7/",
pages = "39--46",
ISBN = "978-88-6719-334-9",
abstract = "In the age of digital communication, social media platforms have become a medium for the spread of misinformation, with racial hoaxes posing a particularly insidious threat. These hoaxes falsely associate individuals or communities with crimes or misconduct, perpetuating harmful stereotypes and inflaming societal tensions. This paper describes the team ``Hope{\_}for{\_}best'' submission that addresses the challenge of detecting racial hoaxes in codemixed Hindi-English (Hinglish) social media content and secured the 2nd rank in the shared task (Chakravarthi et al., 2025). To address this challenge, the study employs the HoaxMix Plus dataset, developed by LT-EDI 2025, and adopts a multi-phase fine-tuning strategy. Initially, models are sensitized using the THAR dataset{---}targeted hate speech against religion (Sharma et al., 2024) {---}to adjust weights toward contextually relevant biases. Further fine-tuning was performed on the HoaxMix Plus dataset. This work employed data balancing sampling strategies to mitigate class imbalance. Among the evaluated models, Hing BERT achieved the highest macro F1-score of 73{\%} demonstrating promising capabilities in detecting racially charged misinformation in code-mixed Hindi-English texts."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yadav-etal-2025-hope">
<titleInfo>
<title>Hope_for_best@LT-EDI 2025: Detecting Racial Hoaxes in Code-Mixed Hindi-English Social Media Data using a multi-phase fine-tuning strategy</title>
</titleInfo>
<name type="personal">
<namePart type="given">Abhishek</namePart>
<namePart type="given">Singh</namePart>
<namePart type="family">Yadav</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Deepawali</namePart>
<namePart type="family">Sharma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aakash</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 5th Conference on Language, Data and Knowledge: Fifth Workshop on Language Technology for Equality, Diversity, Inclusion</title>
</titleInfo>
<name type="personal">
<namePart type="given">Katerina</namePart>
<namePart type="family">Gkirtzou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Slavko</namePart>
<namePart type="family">Žitnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jorge</namePart>
<namePart type="family">Gracia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dagmar</namePart>
<namePart type="family">Gromann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="given">Pia</namePart>
<namePart type="family">di Buono</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Johanna</namePart>
<namePart type="family">Monti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maxim</namePart>
<namePart type="family">Ionov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Unior Press</publisher>
<place>
<placeTerm type="text">Naples, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">978-88-6719-334-9</identifier>
</relatedItem>
<abstract>In the age of digital communication, social media platforms have become a medium for the spread of misinformation, with racial hoaxes posing a particularly insidious threat. These hoaxes falsely associate individuals or communities with crimes or misconduct, perpetuating harmful stereotypes and inflaming societal tensions. This paper describes the team “Hope_for_best” submission that addresses the challenge of detecting racial hoaxes in codemixed Hindi-English (Hinglish) social media content and secured the 2nd rank in the shared task (Chakravarthi et al., 2025). To address this challenge, the study employs the HoaxMix Plus dataset, developed by LT-EDI 2025, and adopts a multi-phase fine-tuning strategy. Initially, models are sensitized using the THAR dataset—targeted hate speech against religion (Sharma et al., 2024) —to adjust weights toward contextually relevant biases. Further fine-tuning was performed on the HoaxMix Plus dataset. This work employed data balancing sampling strategies to mitigate class imbalance. Among the evaluated models, Hing BERT achieved the highest macro F1-score of 73% demonstrating promising capabilities in detecting racially charged misinformation in code-mixed Hindi-English texts.</abstract>
<identifier type="citekey">yadav-etal-2025-hope</identifier>
<location>
<url>https://aclanthology.org/2025.ltedi-1.7/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>39</start>
<end>46</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Hope_for_best@LT-EDI 2025: Detecting Racial Hoaxes in Code-Mixed Hindi-English Social Media Data using a multi-phase fine-tuning strategy
%A Yadav, Abhishek Singh
%A Sharma, Deepawali
%A Singh, Aakash
%A Singh, Vivek Kumar
%Y Gkirtzou, Katerina
%Y Žitnik, Slavko
%Y Gracia, Jorge
%Y Gromann, Dagmar
%Y di Buono, Maria Pia
%Y Monti, Johanna
%Y Ionov, Maxim
%S Proceedings of the 5th Conference on Language, Data and Knowledge: Fifth Workshop on Language Technology for Equality, Diversity, Inclusion
%D 2025
%8 September
%I Unior Press
%C Naples, Italy
%@ 978-88-6719-334-9
%F yadav-etal-2025-hope
%X In the age of digital communication, social media platforms have become a medium for the spread of misinformation, with racial hoaxes posing a particularly insidious threat. These hoaxes falsely associate individuals or communities with crimes or misconduct, perpetuating harmful stereotypes and inflaming societal tensions. This paper describes the team “Hope_for_best” submission that addresses the challenge of detecting racial hoaxes in codemixed Hindi-English (Hinglish) social media content and secured the 2nd rank in the shared task (Chakravarthi et al., 2025). To address this challenge, the study employs the HoaxMix Plus dataset, developed by LT-EDI 2025, and adopts a multi-phase fine-tuning strategy. Initially, models are sensitized using the THAR dataset—targeted hate speech against religion (Sharma et al., 2024) —to adjust weights toward contextually relevant biases. Further fine-tuning was performed on the HoaxMix Plus dataset. This work employed data balancing sampling strategies to mitigate class imbalance. Among the evaluated models, Hing BERT achieved the highest macro F1-score of 73% demonstrating promising capabilities in detecting racially charged misinformation in code-mixed Hindi-English texts.
%U https://aclanthology.org/2025.ltedi-1.7/
%P 39-46
Markdown (Informal)
[Hope_for_best@LT-EDI 2025: Detecting Racial Hoaxes in Code-Mixed Hindi-English Social Media Data using a multi-phase fine-tuning strategy](https://aclanthology.org/2025.ltedi-1.7/) (Yadav et al., LTEDI 2025)
ACL