@inproceedings{choi-etal-2018-convolutional,
title = "Convolutional Attention Networks for Multimodal Emotion Recognition from Speech and Text Data",
author = "Choi, Woo Yong and
Song, Kyu Ye and
Lee, Chan Woo",
editor = "Zadeh, Amir and
Liang, Paul Pu and
Morency, Louis-Philippe and
Poria, Soujanya and
Cambria, Erik and
Scherer, Stefan",
booktitle = "Proceedings of Grand Challenge and Workshop on Human Multimodal Language (Challenge-{HML})",
month = jul,
year = "2018",
address = "Melbourne, Australia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-3304",
doi = "10.18653/v1/W18-3304",
pages = "28--34",
abstract = "Emotion recognition has become a popular topic of interest, especially in the field of human computer interaction. Previous works involve unimodal analysis of emotion, while recent efforts focus on multimodal emotion recognition from vision and speech. In this paper, we propose a new method of learning about the hidden representations between just speech and text data using convolutional attention networks. Compared to the shallow model which employs simple concatenation of feature vectors, the proposed attention model performs much better in classifying emotion from speech and text data contained in the CMU-MOSEI dataset.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="choi-etal-2018-convolutional">
<titleInfo>
<title>Convolutional Attention Networks for Multimodal Emotion Recognition from Speech and Text Data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Woo</namePart>
<namePart type="given">Yong</namePart>
<namePart type="family">Choi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kyu</namePart>
<namePart type="given">Ye</namePart>
<namePart type="family">Song</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chan</namePart>
<namePart type="given">Woo</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of Grand Challenge and Workshop on Human Multimodal Language (Challenge-HML)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Amir</namePart>
<namePart type="family">Zadeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="given">Pu</namePart>
<namePart type="family">Liang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Louis-Philippe</namePart>
<namePart type="family">Morency</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Soujanya</namePart>
<namePart type="family">Poria</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Erik</namePart>
<namePart type="family">Cambria</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefan</namePart>
<namePart type="family">Scherer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Melbourne, Australia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Emotion recognition has become a popular topic of interest, especially in the field of human computer interaction. Previous works involve unimodal analysis of emotion, while recent efforts focus on multimodal emotion recognition from vision and speech. In this paper, we propose a new method of learning about the hidden representations between just speech and text data using convolutional attention networks. Compared to the shallow model which employs simple concatenation of feature vectors, the proposed attention model performs much better in classifying emotion from speech and text data contained in the CMU-MOSEI dataset.</abstract>
<identifier type="citekey">choi-etal-2018-convolutional</identifier>
<identifier type="doi">10.18653/v1/W18-3304</identifier>
<location>
<url>https://aclanthology.org/W18-3304</url>
</location>
<part>
<date>2018-07</date>
<extent unit="page">
<start>28</start>
<end>34</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Convolutional Attention Networks for Multimodal Emotion Recognition from Speech and Text Data
%A Choi, Woo Yong
%A Song, Kyu Ye
%A Lee, Chan Woo
%Y Zadeh, Amir
%Y Liang, Paul Pu
%Y Morency, Louis-Philippe
%Y Poria, Soujanya
%Y Cambria, Erik
%Y Scherer, Stefan
%S Proceedings of Grand Challenge and Workshop on Human Multimodal Language (Challenge-HML)
%D 2018
%8 July
%I Association for Computational Linguistics
%C Melbourne, Australia
%F choi-etal-2018-convolutional
%X Emotion recognition has become a popular topic of interest, especially in the field of human computer interaction. Previous works involve unimodal analysis of emotion, while recent efforts focus on multimodal emotion recognition from vision and speech. In this paper, we propose a new method of learning about the hidden representations between just speech and text data using convolutional attention networks. Compared to the shallow model which employs simple concatenation of feature vectors, the proposed attention model performs much better in classifying emotion from speech and text data contained in the CMU-MOSEI dataset.
%R 10.18653/v1/W18-3304
%U https://aclanthology.org/W18-3304
%U https://doi.org/10.18653/v1/W18-3304
%P 28-34
Markdown (Informal)
[Convolutional Attention Networks for Multimodal Emotion Recognition from Speech and Text Data](https://aclanthology.org/W18-3304) (Choi et al., ACL 2018)
ACL