@InProceedings{gu-EtAl:2018:Long,
  author    = {Gu, Yue  and  Yang, Kangning  and  Fu, Shiyu  and  Chen, Shuhong  and  Li, Xinyu  and  Marsic, Ivan},
  title     = {Multimodal Affective Analysis Using Hierarchical Attention Strategy with Word-Level Alignment},
  booktitle = {Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
  month     = {July},
  year      = {2018},
  address   = {Melbourne, Australia},
  publisher = {Association for Computational Linguistics},
  pages     = {2225--2235},
  abstract  = {Multimodal affective computing, learning to recognize and interpret human affect and subjective information from multiple data sources, is still a challenge because: (i) it is hard to extract informative features to represent human affects from heterogeneous inputs; (ii) current fusion strategies only fuse different modalities at abstract levels, ignoring time-dependent interactions between modalities. Addressing such issues, we introduce a hierarchical multimodal architecture with attention and word-level fusion to classify utterance-level sentiment and emotion from text and audio data. Our introduced model outperforms state-of-the-art approaches on published datasets, and we demonstrate that our model is able to visualize and interpret synchronized attention over modalities.},
  url       = {http://www.aclweb.org/anthology/P18-1207}
}