@InProceedings{beard-EtAl:2018:K18-1,
  author    = {Beard, Rory  and  Das, Ritwik  and  Ng, Raymond W. M.  and  Gopalakrishnan, P. G. Keerthana  and  Eerens, Luka  and  Swietojanski, Pawel  and  Miksik, Ondrej},
  title     = {Multi-Modal Sequence Fusion via Recursive Attention for Emotion Recognition},
  booktitle = {Proceedings of the 22nd Conference on Computational Natural Language Learning},
  month     = {October},
  year      = {2018},
  address   = {Brussels, Belgium},
  publisher = {Association for Computational Linguistics},
  pages     = {251--259},
  abstract  = {Natural human communication is nuanced and inherently multi-modal. Humans possess specialised sensoria for processing vocal, visual, and linguistic, and para-linguistic information, but form an intricately fused percept of the multi-modal data stream to provide a holistic representation. Analysis of emotional content in face-to-face communication is a cognitive task to which humans are particularly attuned, given its sociological importance, and poses a difficult challenge for machine emulation due to the subtlety and expressive variability of cross-modal cues. Inspired by the empirical success of recent so-called End-To-End Memory Networks and related works, we propose an approach based on recursive multi-attention with a shared external memory updated over multiple gated iterations of analysis. We evaluate our model across several large multi-modal datasets and show that global contextualised memory with gated memory update can effectively achieve emotion recognition.},
  url       = {http://www.aclweb.org/anthology/K18-1025}
}

