@inproceedings{pan-etal-2024-umuteam,
title = "{UMUT}eam at {S}em{E}val-2024 Task 4: Multimodal Identification of Persuasive Techniques in Memes through Large Language Models",
author = "Pan, Ronghao and
Garc{\'\i}a-d{\'\i}az, Jos{\'e} Antonio and
Valencia-garc{\'\i}a, Rafael",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Tayyar Madabushi, Harish and
Da San Martino, Giovanni and
Rosenthal, Sara and
Ros{\'a}, Aiala},
booktitle = "Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.semeval-1.96",
doi = "10.18653/v1/2024.semeval-1.96",
pages = "655--666",
abstract = "In this manuscript we describe the UMUTeam{'}s participation in SemEval-2024 Task 4, a shared task to identify different persuasion techniques in memes. The task is divided into three subtasks. One is a multimodal subtask of identifying whether a meme contains persuasion or not. The others are hierarchical multi-label classifications that consider textual content alone or a multimodal setting of text and visual content. This is a multilingual task, and we participated in all three subtasks but we focus only on the English dataset. Our approach is based on a fine-tuning approach with the pre-trained RoBERTa-large model. In addition, for multimodal cases with both textual and visual content, we used the LMM called LlaVa to extract image descriptions and combine them with the meme text. Our system performed well in three subtasks, achieving the tenth best result with an Hierarchical F1 of 64.774{\%}, the fourth best in Subtask 2a with an Hierarchical F1 of 69.003{\%}, and the eighth best in Subtask 2b with a Macro F1 of 78.660{\%}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pan-etal-2024-umuteam">
<titleInfo>
<title>UMUTeam at SemEval-2024 Task 4: Multimodal Identification of Persuasive Techniques in Memes through Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ronghao</namePart>
<namePart type="family">Pan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">José</namePart>
<namePart type="given">Antonio</namePart>
<namePart type="family">García-díaz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rafael</namePart>
<namePart type="family">Valencia-garcía</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harish</namePart>
<namePart type="family">Tayyar Madabushi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giovanni</namePart>
<namePart type="family">Da San Martino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Rosenthal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aiala</namePart>
<namePart type="family">Rosá</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this manuscript we describe the UMUTeam’s participation in SemEval-2024 Task 4, a shared task to identify different persuasion techniques in memes. The task is divided into three subtasks. One is a multimodal subtask of identifying whether a meme contains persuasion or not. The others are hierarchical multi-label classifications that consider textual content alone or a multimodal setting of text and visual content. This is a multilingual task, and we participated in all three subtasks but we focus only on the English dataset. Our approach is based on a fine-tuning approach with the pre-trained RoBERTa-large model. In addition, for multimodal cases with both textual and visual content, we used the LMM called LlaVa to extract image descriptions and combine them with the meme text. Our system performed well in three subtasks, achieving the tenth best result with an Hierarchical F1 of 64.774%, the fourth best in Subtask 2a with an Hierarchical F1 of 69.003%, and the eighth best in Subtask 2b with a Macro F1 of 78.660%.</abstract>
<identifier type="citekey">pan-etal-2024-umuteam</identifier>
<identifier type="doi">10.18653/v1/2024.semeval-1.96</identifier>
<location>
<url>https://aclanthology.org/2024.semeval-1.96</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>655</start>
<end>666</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T UMUTeam at SemEval-2024 Task 4: Multimodal Identification of Persuasive Techniques in Memes through Large Language Models
%A Pan, Ronghao
%A García-díaz, José Antonio
%A Valencia-garcía, Rafael
%Y Ojha, Atul Kr.
%Y Doğruöz, A. Seza
%Y Tayyar Madabushi, Harish
%Y Da San Martino, Giovanni
%Y Rosenthal, Sara
%Y Rosá, Aiala
%S Proceedings of the 18th International Workshop on Semantic Evaluation (SemEval-2024)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F pan-etal-2024-umuteam
%X In this manuscript we describe the UMUTeam’s participation in SemEval-2024 Task 4, a shared task to identify different persuasion techniques in memes. The task is divided into three subtasks. One is a multimodal subtask of identifying whether a meme contains persuasion or not. The others are hierarchical multi-label classifications that consider textual content alone or a multimodal setting of text and visual content. This is a multilingual task, and we participated in all three subtasks but we focus only on the English dataset. Our approach is based on a fine-tuning approach with the pre-trained RoBERTa-large model. In addition, for multimodal cases with both textual and visual content, we used the LMM called LlaVa to extract image descriptions and combine them with the meme text. Our system performed well in three subtasks, achieving the tenth best result with an Hierarchical F1 of 64.774%, the fourth best in Subtask 2a with an Hierarchical F1 of 69.003%, and the eighth best in Subtask 2b with a Macro F1 of 78.660%.
%R 10.18653/v1/2024.semeval-1.96
%U https://aclanthology.org/2024.semeval-1.96
%U https://doi.org/10.18653/v1/2024.semeval-1.96
%P 655-666
Markdown (Informal)
[UMUTeam at SemEval-2024 Task 4: Multimodal Identification of Persuasive Techniques in Memes through Large Language Models](https://aclanthology.org/2024.semeval-1.96) (Pan et al., SemEval 2024)
ACL