@inproceedings{gurung-shakya-2025-team,
title = "Team {M}eme{M}asters@{CASE} 2025: Adapting Vision-Language Models for Understanding Hate Speech in Multimodal Content",
author = "Gurung, Shruti and
Shakya, Shubham",
editor = {H{\"u}rriyeto{\u{g}}lu, Ali and
Tanev, Hristo and
Thapa, Surendrabikram},
booktitle = "Proceedings of the 8th Workshop on Challenges and Applications of Automated Extraction of Socio-political Events from Texts",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2025.case-1.18/",
pages = "146--151",
abstract = "Social media memes have become a powerful form of digital communication, combining images and text to convey humor, social commentary, and sometimes harmful content. This paper presents a multimodal approach using a fine-tuned CLIP model to analyze textembedded images in the CASE 2025 Shared Task. We address four subtasks: Hate Speech Detection, Target Classification, Stance Detection, and Humor Detection. Our method effectively captures visual and textual signals, achieving strong performance with precision of 80{\%} for the detection of hate speech and 76{\%} for the detection of humor, while stance and target classification achieved a precision of 60{\%} and 54{\%}, respectively. Detailed evaluations with classification reports and confusion matrices highlight the ability of the model to handle complex multimodal signals in social media content, demonstrating the potential of vision-language models for computational social science applications."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gurung-shakya-2025-team">
<titleInfo>
<title>Team MemeMasters@CASE 2025: Adapting Vision-Language Models for Understanding Hate Speech in Multimodal Content</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shruti</namePart>
<namePart type="family">Gurung</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shubham</namePart>
<namePart type="family">Shakya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 8th Workshop on Challenges and Applications of Automated Extraction of Socio-political Events from Texts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ali</namePart>
<namePart type="family">Hürriyetoğlu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hristo</namePart>
<namePart type="family">Tanev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Surendrabikram</namePart>
<namePart type="family">Thapa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Social media memes have become a powerful form of digital communication, combining images and text to convey humor, social commentary, and sometimes harmful content. This paper presents a multimodal approach using a fine-tuned CLIP model to analyze textembedded images in the CASE 2025 Shared Task. We address four subtasks: Hate Speech Detection, Target Classification, Stance Detection, and Humor Detection. Our method effectively captures visual and textual signals, achieving strong performance with precision of 80% for the detection of hate speech and 76% for the detection of humor, while stance and target classification achieved a precision of 60% and 54%, respectively. Detailed evaluations with classification reports and confusion matrices highlight the ability of the model to handle complex multimodal signals in social media content, demonstrating the potential of vision-language models for computational social science applications.</abstract>
<identifier type="citekey">gurung-shakya-2025-team</identifier>
<location>
<url>https://aclanthology.org/2025.case-1.18/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>146</start>
<end>151</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Team MemeMasters@CASE 2025: Adapting Vision-Language Models for Understanding Hate Speech in Multimodal Content
%A Gurung, Shruti
%A Shakya, Shubham
%Y Hürriyetoğlu, Ali
%Y Tanev, Hristo
%Y Thapa, Surendrabikram
%S Proceedings of the 8th Workshop on Challenges and Applications of Automated Extraction of Socio-political Events from Texts
%D 2025
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F gurung-shakya-2025-team
%X Social media memes have become a powerful form of digital communication, combining images and text to convey humor, social commentary, and sometimes harmful content. This paper presents a multimodal approach using a fine-tuned CLIP model to analyze textembedded images in the CASE 2025 Shared Task. We address four subtasks: Hate Speech Detection, Target Classification, Stance Detection, and Humor Detection. Our method effectively captures visual and textual signals, achieving strong performance with precision of 80% for the detection of hate speech and 76% for the detection of humor, while stance and target classification achieved a precision of 60% and 54%, respectively. Detailed evaluations with classification reports and confusion matrices highlight the ability of the model to handle complex multimodal signals in social media content, demonstrating the potential of vision-language models for computational social science applications.
%U https://aclanthology.org/2025.case-1.18/
%P 146-151
Markdown (Informal)
[Team MemeMasters@CASE 2025: Adapting Vision-Language Models for Understanding Hate Speech in Multimodal Content](https://aclanthology.org/2025.case-1.18/) (Gurung & Shakya, CASE 2025)
ACL