@InProceedings{li-mak:2017:EACLshort,
  author    = {Li, Wei  and  Mak, Brian},
  title     = {Derivation of Document Vectors from Adaptation of LSTM Language Model},
  booktitle = {Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers},
  month     = {April},
  year      = {2017},
  address   = {Valencia, Spain},
  publisher = {Association for Computational Linguistics},
  pages     = {456--461},
  abstract  = {In many natural language processing (NLP) tasks, a document is commonly modeled
	as a bag of words using the term frequency-inverse document frequency (TF-IDF)
	vector. One major shortcoming of the frequency-based TF-IDF feature vector is
	that it ignores word orders that carry syntactic and semantic relationships
	among the words in a document. This paper proposes a novel distributed vector
	representation of a document, which will be labeled as DV-LSTM, and is derived
	from the result of adapting a long short-term memory recurrent neural network
	language model by the document. DV-LSTM is expected to capture some high-level
	sequential information in the document, which other current document
	representations fail to do. It was evaluated in document genre classification
	in the Brown Corpus and the BNC Baby Corpus. The results show that DV-LSTM
	significantly outperforms TF-IDF vector and paragraph vector (PV-DM) in most
	cases, and their combinations may further improve the classification
	performance.},
  url       = {http://www.aclweb.org/anthology/E17-2073}
}

