@inproceedings{yang-etal-2024-clgsi,
title = "{CLGSI}: A Multimodal Sentiment Analysis Framework based on Contrastive Learning Guided by Sentiment Intensity",
author = "Yang, Yang and
Dong, Xunde and
Qiang, Yupeng",
editor = "Duh, Kevin and
Gomez, Helena and
Bethard, Steven",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2024",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-naacl.135",
doi = "10.18653/v1/2024.findings-naacl.135",
pages = "2099--2110",
abstract = "Recently, contrastive learning has begun to gain popularity in multimodal sentiment analysis (MSA). However, most of existing MSA methods based on contrastive learning lacks more detailed learning of the distribution of sample pairs with different sentiment intensity differences in the contrastive learning representation space. In addition, limited research has been conducted on the fusion of each modality representation obtained by contrastive learning training.In this paper, we propose a novel framework for multimodal sentiment analysis based on Contrastive Learning Guided by Sentiment Intensity (CLGSI). Firstly, the proposed contrastive learning guided by sentiment intensity selects positive and negative sample pairs based on the difference in sentiment intensity and assigns corresponding weights accordingly.Subsequently, we propose a new multimodal representation fusion mechanism, called Global-Local-Fine-Knowledge (GLFK), which extracts common features between different modalities{'} representations. At the same time, each unimodal encoder output is separately processed by a Multilayer Perceptron (MLP) to extract specific features of each modality. Finally, joint learning of the common and specific features is used to predict sentiment intensity. The effectiveness of CLGSI is assessed on two English datasets, MOSI and MOSEI, as well as one Chinese dataset, SIMS. We achieve competitive experimental results, which attest to the strong generalization performance of our approach. The code for our approach will be released in https://github.com/AZYoung233/CLGSI",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yang-etal-2024-clgsi">
<titleInfo>
<title>CLGSI: A Multimodal Sentiment Analysis Framework based on Contrastive Learning Guided by Sentiment Intensity</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xunde</namePart>
<namePart type="family">Dong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yupeng</namePart>
<namePart type="family">Qiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: NAACL 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Duh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Gomez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recently, contrastive learning has begun to gain popularity in multimodal sentiment analysis (MSA). However, most of existing MSA methods based on contrastive learning lacks more detailed learning of the distribution of sample pairs with different sentiment intensity differences in the contrastive learning representation space. In addition, limited research has been conducted on the fusion of each modality representation obtained by contrastive learning training.In this paper, we propose a novel framework for multimodal sentiment analysis based on Contrastive Learning Guided by Sentiment Intensity (CLGSI). Firstly, the proposed contrastive learning guided by sentiment intensity selects positive and negative sample pairs based on the difference in sentiment intensity and assigns corresponding weights accordingly.Subsequently, we propose a new multimodal representation fusion mechanism, called Global-Local-Fine-Knowledge (GLFK), which extracts common features between different modalities’ representations. At the same time, each unimodal encoder output is separately processed by a Multilayer Perceptron (MLP) to extract specific features of each modality. Finally, joint learning of the common and specific features is used to predict sentiment intensity. The effectiveness of CLGSI is assessed on two English datasets, MOSI and MOSEI, as well as one Chinese dataset, SIMS. We achieve competitive experimental results, which attest to the strong generalization performance of our approach. The code for our approach will be released in https://github.com/AZYoung233/CLGSI</abstract>
<identifier type="citekey">yang-etal-2024-clgsi</identifier>
<identifier type="doi">10.18653/v1/2024.findings-naacl.135</identifier>
<location>
<url>https://aclanthology.org/2024.findings-naacl.135</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>2099</start>
<end>2110</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CLGSI: A Multimodal Sentiment Analysis Framework based on Contrastive Learning Guided by Sentiment Intensity
%A Yang, Yang
%A Dong, Xunde
%A Qiang, Yupeng
%Y Duh, Kevin
%Y Gomez, Helena
%Y Bethard, Steven
%S Findings of the Association for Computational Linguistics: NAACL 2024
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F yang-etal-2024-clgsi
%X Recently, contrastive learning has begun to gain popularity in multimodal sentiment analysis (MSA). However, most of existing MSA methods based on contrastive learning lacks more detailed learning of the distribution of sample pairs with different sentiment intensity differences in the contrastive learning representation space. In addition, limited research has been conducted on the fusion of each modality representation obtained by contrastive learning training.In this paper, we propose a novel framework for multimodal sentiment analysis based on Contrastive Learning Guided by Sentiment Intensity (CLGSI). Firstly, the proposed contrastive learning guided by sentiment intensity selects positive and negative sample pairs based on the difference in sentiment intensity and assigns corresponding weights accordingly.Subsequently, we propose a new multimodal representation fusion mechanism, called Global-Local-Fine-Knowledge (GLFK), which extracts common features between different modalities’ representations. At the same time, each unimodal encoder output is separately processed by a Multilayer Perceptron (MLP) to extract specific features of each modality. Finally, joint learning of the common and specific features is used to predict sentiment intensity. The effectiveness of CLGSI is assessed on two English datasets, MOSI and MOSEI, as well as one Chinese dataset, SIMS. We achieve competitive experimental results, which attest to the strong generalization performance of our approach. The code for our approach will be released in https://github.com/AZYoung233/CLGSI
%R 10.18653/v1/2024.findings-naacl.135
%U https://aclanthology.org/2024.findings-naacl.135
%U https://doi.org/10.18653/v1/2024.findings-naacl.135
%P 2099-2110
Markdown (Informal)
[CLGSI: A Multimodal Sentiment Analysis Framework based on Contrastive Learning Guided by Sentiment Intensity](https://aclanthology.org/2024.findings-naacl.135) (Yang et al., Findings 2024)
ACL