@InProceedings{zhou-EtAl:2016:COLING2,
  author    = {Zhou, Peng  and  Qi, Zhenyu  and  Zheng, Suncong  and  Xu, Jiaming  and  Bao, Hongyun  and  Xu, Bo},
  title     = {Text Classification Improved by Integrating Bidirectional LSTM with Two-dimensional Max Pooling},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3485--3495},
  abstract  = {Recurrent Neural Network (RNN) is one of the most popular architectures used in
	Natural Language Processsing (NLP) tasks because its recurrent structure is
	very suitable to process variable-length text. RNN can utilize distributed
	representations of words by first converting the tokens comprising each text
	into vectors, which form a matrix. And this matrix includes two dimensions: the
	time-step dimension and the feature vector dimension. Then most existing models
	usually utilize one-dimensional (1D) max pooling operation or attention-based
	operation only on the time-step dimension to obtain a fixed-length vector. 
	However, the features on the feature vector dimension are not mutually
	independent, and simply applying 1D pooling operation over the time-step
	dimension independently may destroy the structure of the feature
	representation. On the other hand, applying two-dimensional (2D) pooling
	operation over the two dimensions may sample more meaningful features for
	sequence modeling tasks. To integrate the features on both dimensions of the
	matrix, this paper explores applying 2D max pooling operation to obtain a
	fixed-length representation of the text. This paper also utilizes 2D
	convolution to sample more meaningful information of the matrix. Experiments
	are conducted on six text classification tasks, including sentiment analysis,
	question classification, subjectivity classification and newsgroup
	classification. Compared with the state-of-the-art models, the proposed models
	achieve excellent performance on 4 out of 6 tasks. Specifically, one of the
	proposed models achieves highest accuracy on Stanford Sentiment Treebank binary
	classification and fine-grained classification tasks.},
  url       = {http://aclweb.org/anthology/C16-1329}
}

