@inproceedings{sharma-etal-2022-r2d2,
title = "{R}2{D}2 at {S}em{E}val-2022 Task 5: Attention is only as good as its Values! A multimodal system for identifying misogynist memes",
author = "Sharma, Mayukh and
Kandasamy, Ilanthenral and
W B, Vasantha",
editor = "Emerson, Guy and
Schluter, Natalie and
Stanovsky, Gabriel and
Kumar, Ritesh and
Palmer, Alexis and
Schneider, Nathan and
Singh, Siddharth and
Ratan, Shyam",
booktitle = "Proceedings of the 16th International Workshop on Semantic Evaluation (SemEval-2022)",
month = jul,
year = "2022",
address = "Seattle, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.semeval-1.106",
doi = "10.18653/v1/2022.semeval-1.106",
pages = "761--770",
abstract = "This paper describes the multimodal deep learning system proposed for SemEval 2022 Task 5: MAMI - Multimedia Automatic Misogyny Identification. We participated in both Subtasks, i.e. Subtask A: Misogynous meme identification, and Subtask B: Identifying type of misogyny among potential overlapping categories (stereotype, shaming, objectification, violence). The proposed architecture uses pre-trained models as feature extractors for text and images. We use these features to learn multimodal representation using methods like concatenation and scaled dot product attention. Classification layers are used on fused features as per the subtask definition. We also performed experiments using unimodal models for setting up comparative baselines. Our best performing system achieved an F1 score of 0.757 and was ranked $3^{rd}$ in Subtask A. On Subtask B, our system performed well with an F1 score of 0.690 and was ranked $10^{th}$ on the leaderboard. We further show extensive experiments using combinations of different pre-trained models which will be helpful as baselines for future work.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sharma-etal-2022-r2d2">
<titleInfo>
<title>R2D2 at SemEval-2022 Task 5: Attention is only as good as its Values! A multimodal system for identifying misogynist memes</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mayukh</namePart>
<namePart type="family">Sharma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ilanthenral</namePart>
<namePart type="family">Kandasamy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vasantha</namePart>
<namePart type="family">W B</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 16th International Workshop on Semantic Evaluation (SemEval-2022)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Guy</namePart>
<namePart type="family">Emerson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Natalie</namePart>
<namePart type="family">Schluter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gabriel</namePart>
<namePart type="family">Stanovsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ritesh</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexis</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nathan</namePart>
<namePart type="family">Schneider</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Siddharth</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shyam</namePart>
<namePart type="family">Ratan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes the multimodal deep learning system proposed for SemEval 2022 Task 5: MAMI - Multimedia Automatic Misogyny Identification. We participated in both Subtasks, i.e. Subtask A: Misogynous meme identification, and Subtask B: Identifying type of misogyny among potential overlapping categories (stereotype, shaming, objectification, violence). The proposed architecture uses pre-trained models as feature extractors for text and images. We use these features to learn multimodal representation using methods like concatenation and scaled dot product attention. Classification layers are used on fused features as per the subtask definition. We also performed experiments using unimodal models for setting up comparative baselines. Our best performing system achieved an F1 score of 0.757 and was ranked 3^rd in Subtask A. On Subtask B, our system performed well with an F1 score of 0.690 and was ranked 10^th on the leaderboard. We further show extensive experiments using combinations of different pre-trained models which will be helpful as baselines for future work.</abstract>
<identifier type="citekey">sharma-etal-2022-r2d2</identifier>
<identifier type="doi">10.18653/v1/2022.semeval-1.106</identifier>
<location>
<url>https://aclanthology.org/2022.semeval-1.106</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>761</start>
<end>770</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T R2D2 at SemEval-2022 Task 5: Attention is only as good as its Values! A multimodal system for identifying misogynist memes
%A Sharma, Mayukh
%A Kandasamy, Ilanthenral
%A W B, Vasantha
%Y Emerson, Guy
%Y Schluter, Natalie
%Y Stanovsky, Gabriel
%Y Kumar, Ritesh
%Y Palmer, Alexis
%Y Schneider, Nathan
%Y Singh, Siddharth
%Y Ratan, Shyam
%S Proceedings of the 16th International Workshop on Semantic Evaluation (SemEval-2022)
%D 2022
%8 July
%I Association for Computational Linguistics
%C Seattle, United States
%F sharma-etal-2022-r2d2
%X This paper describes the multimodal deep learning system proposed for SemEval 2022 Task 5: MAMI - Multimedia Automatic Misogyny Identification. We participated in both Subtasks, i.e. Subtask A: Misogynous meme identification, and Subtask B: Identifying type of misogyny among potential overlapping categories (stereotype, shaming, objectification, violence). The proposed architecture uses pre-trained models as feature extractors for text and images. We use these features to learn multimodal representation using methods like concatenation and scaled dot product attention. Classification layers are used on fused features as per the subtask definition. We also performed experiments using unimodal models for setting up comparative baselines. Our best performing system achieved an F1 score of 0.757 and was ranked 3^rd in Subtask A. On Subtask B, our system performed well with an F1 score of 0.690 and was ranked 10^th on the leaderboard. We further show extensive experiments using combinations of different pre-trained models which will be helpful as baselines for future work.
%R 10.18653/v1/2022.semeval-1.106
%U https://aclanthology.org/2022.semeval-1.106
%U https://doi.org/10.18653/v1/2022.semeval-1.106
%P 761-770
Markdown (Informal)
[R2D2 at SemEval-2022 Task 5: Attention is only as good as its Values! A multimodal system for identifying misogynist memes](https://aclanthology.org/2022.semeval-1.106) (Sharma et al., SemEval 2022)
ACL