@InProceedings{zadeh-EtAl:2017:EMNLP2017,
  author    = {Zadeh, Amir  and  Chen, Minghai  and  Poria, Soujanya  and  Cambria, Erik  and  Morency, Louis-Philippe},
  title     = {Tensor Fusion Network for Multimodal Sentiment Analysis},
  booktitle = {Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing},
  month     = {September},
  year      = {2017},
  address   = {Copenhagen, Denmark},
  publisher = {Association for Computational Linguistics},
  pages     = {1103--1114},
  abstract  = {Multimodal sentiment analysis is an increasingly popular research area, which
	extends the conventional language-based definition of sentiment analysis to a
	multimodal setup where other relevant modalities accompany language. In this
	paper, we pose the problem of multimodal sentiment analysis as modeling
	intra-modality and inter-modality dynamics. We introduce a novel model, termed
	Tensor Fusion Networks, which learns both such dynamics end-to-end. The
	proposed approach is tailored for the volatile nature of spoken language in
	online videos as well as accompanying gestures and voice. In the experiments,
	our model outperforms state-of-the-art approaches for both multimodal and
	unimodal sentiment analysis.},
  url       = {https://www.aclweb.org/anthology/D17-1115}
}