@inproceedings{gu-etal-2022-mmvae,
title = "{MMVAE} at {S}em{E}val-2022 Task 5: A Multi-modal Multi-task {VAE} on Misogynous Meme Detection",
author = "Gu, Yimeng and
Castro, Ignacio and
Tyson, Gareth",
editor = "Emerson, Guy and
Schluter, Natalie and
Stanovsky, Gabriel and
Kumar, Ritesh and
Palmer, Alexis and
Schneider, Nathan and
Singh, Siddharth and
Ratan, Shyam",
booktitle = "Proceedings of the 16th International Workshop on Semantic Evaluation (SemEval-2022)",
month = jul,
year = "2022",
address = "Seattle, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.semeval-1.96",
doi = "10.18653/v1/2022.semeval-1.96",
pages = "700--710",
abstract = "Nowadays, memes have become quite common in day-to-day communications on social media platforms. They appear to be amusing, evoking and attractive to audiences. However, some memes containing malicious contents can be harmful to the targeted group and arouse public anger in the long run. In this paper, we study misogynous meme detection, a shared task in SemEval 2022 - Multimedia Automatic Misogyny Identification (MAMI). The challenge of misogynous meme detection is to co-represent multi-modal features. To tackle with this challenge, we propose a Multi-modal Multi-task Variational AutoEncoder (MMVAE) to learn an effective co-representation of visual and textual features in the latent space, and determine if the meme contains misogynous information and identify its fine-grained categories. Our model achieves 0.723 on sub-task A and 0.634 on sub-task B in terms of $F_{1}$ scores. We carry out comprehensive experiments on our model{'}s architecture and show that our approach significantly outperforms several strong uni-modal and multi-modal approaches. Our code is released on github.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gu-etal-2022-mmvae">
<titleInfo>
<title>MMVAE at SemEval-2022 Task 5: A Multi-modal Multi-task VAE on Misogynous Meme Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yimeng</namePart>
<namePart type="family">Gu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ignacio</namePart>
<namePart type="family">Castro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gareth</namePart>
<namePart type="family">Tyson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 16th International Workshop on Semantic Evaluation (SemEval-2022)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Guy</namePart>
<namePart type="family">Emerson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Natalie</namePart>
<namePart type="family">Schluter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gabriel</namePart>
<namePart type="family">Stanovsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ritesh</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexis</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nathan</namePart>
<namePart type="family">Schneider</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Siddharth</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shyam</namePart>
<namePart type="family">Ratan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Nowadays, memes have become quite common in day-to-day communications on social media platforms. They appear to be amusing, evoking and attractive to audiences. However, some memes containing malicious contents can be harmful to the targeted group and arouse public anger in the long run. In this paper, we study misogynous meme detection, a shared task in SemEval 2022 - Multimedia Automatic Misogyny Identification (MAMI). The challenge of misogynous meme detection is to co-represent multi-modal features. To tackle with this challenge, we propose a Multi-modal Multi-task Variational AutoEncoder (MMVAE) to learn an effective co-representation of visual and textual features in the latent space, and determine if the meme contains misogynous information and identify its fine-grained categories. Our model achieves 0.723 on sub-task A and 0.634 on sub-task B in terms of F₁ scores. We carry out comprehensive experiments on our model’s architecture and show that our approach significantly outperforms several strong uni-modal and multi-modal approaches. Our code is released on github.</abstract>
<identifier type="citekey">gu-etal-2022-mmvae</identifier>
<identifier type="doi">10.18653/v1/2022.semeval-1.96</identifier>
<location>
<url>https://aclanthology.org/2022.semeval-1.96</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>700</start>
<end>710</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MMVAE at SemEval-2022 Task 5: A Multi-modal Multi-task VAE on Misogynous Meme Detection
%A Gu, Yimeng
%A Castro, Ignacio
%A Tyson, Gareth
%Y Emerson, Guy
%Y Schluter, Natalie
%Y Stanovsky, Gabriel
%Y Kumar, Ritesh
%Y Palmer, Alexis
%Y Schneider, Nathan
%Y Singh, Siddharth
%Y Ratan, Shyam
%S Proceedings of the 16th International Workshop on Semantic Evaluation (SemEval-2022)
%D 2022
%8 July
%I Association for Computational Linguistics
%C Seattle, United States
%F gu-etal-2022-mmvae
%X Nowadays, memes have become quite common in day-to-day communications on social media platforms. They appear to be amusing, evoking and attractive to audiences. However, some memes containing malicious contents can be harmful to the targeted group and arouse public anger in the long run. In this paper, we study misogynous meme detection, a shared task in SemEval 2022 - Multimedia Automatic Misogyny Identification (MAMI). The challenge of misogynous meme detection is to co-represent multi-modal features. To tackle with this challenge, we propose a Multi-modal Multi-task Variational AutoEncoder (MMVAE) to learn an effective co-representation of visual and textual features in the latent space, and determine if the meme contains misogynous information and identify its fine-grained categories. Our model achieves 0.723 on sub-task A and 0.634 on sub-task B in terms of F₁ scores. We carry out comprehensive experiments on our model’s architecture and show that our approach significantly outperforms several strong uni-modal and multi-modal approaches. Our code is released on github.
%R 10.18653/v1/2022.semeval-1.96
%U https://aclanthology.org/2022.semeval-1.96
%U https://doi.org/10.18653/v1/2022.semeval-1.96
%P 700-710
Markdown (Informal)
[MMVAE at SemEval-2022 Task 5: A Multi-modal Multi-task VAE on Misogynous Meme Detection](https://aclanthology.org/2022.semeval-1.96) (Gu et al., SemEval 2022)
ACL