@inproceedings{sourav-ouyang-2021-lightweight,
title = "Lightweight Models for Multimodal Sequential Data",
author = "Sourav, Soumya and
Ouyang, Jessica",
editor = "De Clercq, Orphee and
Balahur, Alexandra and
Sedoc, Joao and
Barriere, Valentin and
Tafreshi, Shabnam and
Buechel, Sven and
Hoste, Veronique",
booktitle = "Proceedings of the Eleventh Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis",
month = apr,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.wassa-1.14/",
pages = "129--137",
abstract = "Human language encompasses more than just text; it also conveys emotions through tone and gestures. We present a case study of three simple and efficient Transformer-based architectures for predicting sentiment and emotion in multimodal data. The Late Fusion model merges unimodal features to create a multimodal feature sequence, the Round Robin model iteratively combines bimodal features using cross-modal attention, and the Hybrid Fusion model combines trimodal and unimodal features together to form a final feature sequence for predicting sentiment. Our experiments show that our small models are effective and outperform the publicly released versions of much larger, state-of-the-art multimodal sentiment analysis systems."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sourav-ouyang-2021-lightweight">
<titleInfo>
<title>Lightweight Models for Multimodal Sequential Data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Soumya</namePart>
<namePart type="family">Sourav</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jessica</namePart>
<namePart type="family">Ouyang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eleventh Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Orphee</namePart>
<namePart type="family">De Clercq</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexandra</namePart>
<namePart type="family">Balahur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joao</namePart>
<namePart type="family">Sedoc</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Valentin</namePart>
<namePart type="family">Barriere</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shabnam</namePart>
<namePart type="family">Tafreshi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sven</namePart>
<namePart type="family">Buechel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Human language encompasses more than just text; it also conveys emotions through tone and gestures. We present a case study of three simple and efficient Transformer-based architectures for predicting sentiment and emotion in multimodal data. The Late Fusion model merges unimodal features to create a multimodal feature sequence, the Round Robin model iteratively combines bimodal features using cross-modal attention, and the Hybrid Fusion model combines trimodal and unimodal features together to form a final feature sequence for predicting sentiment. Our experiments show that our small models are effective and outperform the publicly released versions of much larger, state-of-the-art multimodal sentiment analysis systems.</abstract>
<identifier type="citekey">sourav-ouyang-2021-lightweight</identifier>
<location>
<url>https://aclanthology.org/2021.wassa-1.14/</url>
</location>
<part>
<date>2021-04</date>
<extent unit="page">
<start>129</start>
<end>137</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Lightweight Models for Multimodal Sequential Data
%A Sourav, Soumya
%A Ouyang, Jessica
%Y De Clercq, Orphee
%Y Balahur, Alexandra
%Y Sedoc, Joao
%Y Barriere, Valentin
%Y Tafreshi, Shabnam
%Y Buechel, Sven
%Y Hoste, Veronique
%S Proceedings of the Eleventh Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis
%D 2021
%8 April
%I Association for Computational Linguistics
%C Online
%F sourav-ouyang-2021-lightweight
%X Human language encompasses more than just text; it also conveys emotions through tone and gestures. We present a case study of three simple and efficient Transformer-based architectures for predicting sentiment and emotion in multimodal data. The Late Fusion model merges unimodal features to create a multimodal feature sequence, the Round Robin model iteratively combines bimodal features using cross-modal attention, and the Hybrid Fusion model combines trimodal and unimodal features together to form a final feature sequence for predicting sentiment. Our experiments show that our small models are effective and outperform the publicly released versions of much larger, state-of-the-art multimodal sentiment analysis systems.
%U https://aclanthology.org/2021.wassa-1.14/
%P 129-137
Markdown (Informal)
[Lightweight Models for Multimodal Sequential Data](https://aclanthology.org/2021.wassa-1.14/) (Sourav & Ouyang, WASSA 2021)
ACL
- Soumya Sourav and Jessica Ouyang. 2021. Lightweight Models for Multimodal Sequential Data. In Proceedings of the Eleventh Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis, pages 129–137, Online. Association for Computational Linguistics.