@InProceedings{shen-EtAl:2018:Long1,
  author    = {Shen, Dinghan  and  Wang, Guoyin  and  Wang, Wenlin  and  Renqiang Min, Martin  and  Su, Qinliang  and  Zhang, Yizhe  and  Li, Chunyuan  and  Henao, Ricardo  and  Carin, Lawrence},
  title     = {Baseline Needs More Love: On Simple Word-Embedding-Based Models and Associated Pooling Mechanisms},
  booktitle = {Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
  month     = {July},
  year      = {2018},
  address   = {Melbourne, Australia},
  publisher = {Association for Computational Linguistics},
  pages     = {440--450},
  abstract  = {Many deep learning architectures have been proposed to model the \emph{compositionality} in text sequences, requiring substantial number of parameters and expensive computations. However, there has not been a rigorous evaluation regarding the added value of sophisticated compositional functions. In this paper, we conduct a point-by-point comparative study between Simple Word-Embedding-based Models (SWEMs), consisting of parameter-free pooling operations, relative to word-embedding-based RNN/CNN models. Surprisingly, SWEMs exhibit comparable or even superior performance in the majority of cases considered. Based upon this understanding, we propose two additional pooling strategies over learned word embeddings: ($i$) a max-pooling operation for improved interpretability; and ($ii$) a hierarchical pooling operation, which preserves spatial ($n$-gram) information within text sequences. We present experiments on 17 datasets encompassing three tasks: ($i$) (long) document classification; ($ii$) text sequence matching; and ($iii$) short text tasks, including classification and tagging.},
  url       = {http://www.aclweb.org/anthology/P18-1041}
}

