@InProceedings{chen-EtAl:2016:COLING1,
  author    = {Chen, Kuan-Yu  and  Liu, Shih-Hung  and  Chen, Berlin  and  Wang, Hsin-Min},
  title     = {Learning to Distill: The Essence Vector Modeling Framework},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {358--368},
  abstract  = {In the context of natural language processing, representation learning has
	emerged as a newly active research subject because of its excellent performance
	in many applications. Learning representations of words is a pioneering study
	in
	this school of research. However, paragraph (or sentence and document)
	embedding learning is more suitable/reasonable for some tasks, such as
	sentiment classification and document summarization. Nevertheless, as far as we
	are aware, there is only a dearth of research focusing on launching
	unsupervised paragraph embedding methods. Classic paragraph embedding methods
	infer the representation of a given paragraph by considering all of the words
	occurring in the paragraph. Consequently, those stop or function words that
	occur frequently may mislead the embedding learning process to produce a misty
	paragraph representation. Motivated by these observations, our major
	contributions are twofold. First, we propose a novel unsupervised paragraph
	embedding method, named the essence vector (EV) model, which aims at not only
	distilling the most representative information from a paragraph but also
	excluding the general background information to produce a more informative
	low-dimensional vector representation for the paragraph. We evaluate the
	proposed EV model on benchmark sentiment classification and multi-document
	summarization tasks. The experimental results demonstrate the effectiveness and
	applicability of the proposed embedding method. Second, in view of the
	increasing importance of spoken content processing, an extension of the EV
	model, named the denoising essence vector (D-EV) model, is proposed. The D-EV
	model not only inherits the advantages of the EV model but also can infer a
	more robust representation for a given spoken paragraph against imperfect
	speech recognition. The utility of the D-EV model is evaluated on a spoken
	document summarization task, confirming the effectiveness of the proposed
	embedding method in relation to several well-practiced and state-of-the-art
	summarization methods.},
  url       = {http://aclweb.org/anthology/C16-1035}
}

