@InProceedings{ye-EtAl:2017:Long2,
  author    = {Ye, Jianbo  and  Li, Yanran  and  Wu, Zhaohui  and  Wang, James Z.  and  Li, Wenjie  and  Li, Jia},
  title     = {Determining Gains Acquired from Word Embedding Quantitatively Using Discrete Distribution Clustering},
  booktitle = {Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
  month     = {July},
  year      = {2017},
  address   = {Vancouver, Canada},
  publisher = {Association for Computational Linguistics},
  pages     = {1847--1856},
  abstract  = {Word embeddings have become widely-used in document analysis. While a large
	number of models for mapping words to vector spaces have been developed, it
	remains undetermined how much net gain can be achieved over traditional
	approaches based on bag-of-words. In this paper, we propose a new document
	clustering approach by combining any word embedding with a state-of-the-art
	algorithm for clustering empirical distributions. By using the Wasserstein
	distance between distributions, the word-to-word semantic relationship is taken
	into account in a principled way. The new clustering method is easy to use and
	consistently outperforms other methods on a variety of data sets. More
	importantly, the method provides an effective framework for determining when
	and how much word embeddings contribute to document analysis. Experimental
	results with multiple embedding models are reported.},
  url       = {http://aclweb.org/anthology/P17-1169}
}

