@inproceedings{rane-2025-mmfusion,
title = "{MMF}usion@{CASE} 2025: Attention-Based Multimodal Learning for Text-Image Content Analysis",
author = "Rane, Prerana",
editor = {H{\"u}rriyeto{\u{g}}lu, Ali and
Tanev, Hristo and
Thapa, Surendrabikram},
booktitle = "Proceedings of the 8th Workshop on Challenges and Applications of Automated Extraction of Socio-political Events from Texts",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2025.case-1.14/",
pages = "115--122",
abstract = "Text-embedded images, such as memes, are now increasingly common in social media discourse. These images combine visual and textual elements to convey complex attitudes and emotions. Deciphering the intent of these images is challenging due to their multimodal and context-dependent nature. This paper presents our approach to the Shared Task on Multimodal Hate, Humor, and Stance Detection in Marginalized Movement at CASE 2025. The shared task focuses on four key aspects of multimodal content analysis for text-embedded images: hate speech detection, target identification, stance classification, and humor recognition. We propose a multimodal learning framework that uses both textual and visual representations, along with cross-modal attention mechanisms, to classify content across all tasks effectively."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rane-2025-mmfusion">
<titleInfo>
<title>MMFusion@CASE 2025: Attention-Based Multimodal Learning for Text-Image Content Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Prerana</namePart>
<namePart type="family">Rane</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 8th Workshop on Challenges and Applications of Automated Extraction of Socio-political Events from Texts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ali</namePart>
<namePart type="family">Hürriyetoğlu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hristo</namePart>
<namePart type="family">Tanev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Surendrabikram</namePart>
<namePart type="family">Thapa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Text-embedded images, such as memes, are now increasingly common in social media discourse. These images combine visual and textual elements to convey complex attitudes and emotions. Deciphering the intent of these images is challenging due to their multimodal and context-dependent nature. This paper presents our approach to the Shared Task on Multimodal Hate, Humor, and Stance Detection in Marginalized Movement at CASE 2025. The shared task focuses on four key aspects of multimodal content analysis for text-embedded images: hate speech detection, target identification, stance classification, and humor recognition. We propose a multimodal learning framework that uses both textual and visual representations, along with cross-modal attention mechanisms, to classify content across all tasks effectively.</abstract>
<identifier type="citekey">rane-2025-mmfusion</identifier>
<location>
<url>https://aclanthology.org/2025.case-1.14/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>115</start>
<end>122</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MMFusion@CASE 2025: Attention-Based Multimodal Learning for Text-Image Content Analysis
%A Rane, Prerana
%Y Hürriyetoğlu, Ali
%Y Tanev, Hristo
%Y Thapa, Surendrabikram
%S Proceedings of the 8th Workshop on Challenges and Applications of Automated Extraction of Socio-political Events from Texts
%D 2025
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F rane-2025-mmfusion
%X Text-embedded images, such as memes, are now increasingly common in social media discourse. These images combine visual and textual elements to convey complex attitudes and emotions. Deciphering the intent of these images is challenging due to their multimodal and context-dependent nature. This paper presents our approach to the Shared Task on Multimodal Hate, Humor, and Stance Detection in Marginalized Movement at CASE 2025. The shared task focuses on four key aspects of multimodal content analysis for text-embedded images: hate speech detection, target identification, stance classification, and humor recognition. We propose a multimodal learning framework that uses both textual and visual representations, along with cross-modal attention mechanisms, to classify content across all tasks effectively.
%U https://aclanthology.org/2025.case-1.14/
%P 115-122
Markdown (Informal)
[MMFusion@CASE 2025: Attention-Based Multimodal Learning for Text-Image Content Analysis](https://aclanthology.org/2025.case-1.14/) (Rane, CASE 2025)
ACL