@inproceedings{beard-etal-2018-multi,
title = "Multi-Modal Sequence Fusion via Recursive Attention for Emotion Recognition",
author = "Beard, Rory and
Das, Ritwik and
Ng, Raymond W. M. and
Gopalakrishnan, P. G. Keerthana and
Eerens, Luka and
Swietojanski, Pawel and
Miksik, Ondrej",
editor = "Korhonen, Anna and
Titov, Ivan",
booktitle = "Proceedings of the 22nd Conference on Computational Natural Language Learning",
month = oct,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/K18-1025",
doi = "10.18653/v1/K18-1025",
pages = "251--259",
abstract = "Natural human communication is nuanced and inherently multi-modal. Humans possess specialised sensoria for processing vocal, visual, and linguistic, and para-linguistic information, but form an intricately fused percept of the multi-modal data stream to provide a holistic representation. Analysis of emotional content in face-to-face communication is a cognitive task to which humans are particularly attuned, given its sociological importance, and poses a difficult challenge for machine emulation due to the subtlety and expressive variability of cross-modal cues. Inspired by the empirical success of recent so-called End-To-End Memory Networks and related works, we propose an approach based on recursive multi-attention with a shared external memory updated over multiple gated iterations of analysis. We evaluate our model across several large multi-modal datasets and show that global contextualised memory with gated memory update can effectively achieve emotion recognition.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="beard-etal-2018-multi">
<titleInfo>
<title>Multi-Modal Sequence Fusion via Recursive Attention for Emotion Recognition</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rory</namePart>
<namePart type="family">Beard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ritwik</namePart>
<namePart type="family">Das</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raymond</namePart>
<namePart type="given">W</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Ng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">P</namePart>
<namePart type="given">G</namePart>
<namePart type="given">Keerthana</namePart>
<namePart type="family">Gopalakrishnan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luka</namePart>
<namePart type="family">Eerens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pawel</namePart>
<namePart type="family">Swietojanski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ondrej</namePart>
<namePart type="family">Miksik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 22nd Conference on Computational Natural Language Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Korhonen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Titov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Brussels, Belgium</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Natural human communication is nuanced and inherently multi-modal. Humans possess specialised sensoria for processing vocal, visual, and linguistic, and para-linguistic information, but form an intricately fused percept of the multi-modal data stream to provide a holistic representation. Analysis of emotional content in face-to-face communication is a cognitive task to which humans are particularly attuned, given its sociological importance, and poses a difficult challenge for machine emulation due to the subtlety and expressive variability of cross-modal cues. Inspired by the empirical success of recent so-called End-To-End Memory Networks and related works, we propose an approach based on recursive multi-attention with a shared external memory updated over multiple gated iterations of analysis. We evaluate our model across several large multi-modal datasets and show that global contextualised memory with gated memory update can effectively achieve emotion recognition.</abstract>
<identifier type="citekey">beard-etal-2018-multi</identifier>
<identifier type="doi">10.18653/v1/K18-1025</identifier>
<location>
<url>https://aclanthology.org/K18-1025</url>
</location>
<part>
<date>2018-10</date>
<extent unit="page">
<start>251</start>
<end>259</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multi-Modal Sequence Fusion via Recursive Attention for Emotion Recognition
%A Beard, Rory
%A Das, Ritwik
%A Ng, Raymond W. M.
%A Gopalakrishnan, P. G. Keerthana
%A Eerens, Luka
%A Swietojanski, Pawel
%A Miksik, Ondrej
%Y Korhonen, Anna
%Y Titov, Ivan
%S Proceedings of the 22nd Conference on Computational Natural Language Learning
%D 2018
%8 October
%I Association for Computational Linguistics
%C Brussels, Belgium
%F beard-etal-2018-multi
%X Natural human communication is nuanced and inherently multi-modal. Humans possess specialised sensoria for processing vocal, visual, and linguistic, and para-linguistic information, but form an intricately fused percept of the multi-modal data stream to provide a holistic representation. Analysis of emotional content in face-to-face communication is a cognitive task to which humans are particularly attuned, given its sociological importance, and poses a difficult challenge for machine emulation due to the subtlety and expressive variability of cross-modal cues. Inspired by the empirical success of recent so-called End-To-End Memory Networks and related works, we propose an approach based on recursive multi-attention with a shared external memory updated over multiple gated iterations of analysis. We evaluate our model across several large multi-modal datasets and show that global contextualised memory with gated memory update can effectively achieve emotion recognition.
%R 10.18653/v1/K18-1025
%U https://aclanthology.org/K18-1025
%U https://doi.org/10.18653/v1/K18-1025
%P 251-259
Markdown (Informal)
[Multi-Modal Sequence Fusion via Recursive Attention for Emotion Recognition](https://aclanthology.org/K18-1025) (Beard et al., CoNLL 2018)
ACL