@InProceedings{melamud-goldberger:2017:Short,
  author    = {Melamud, Oren  and  Goldberger, Jacob},
  title     = {Information-Theory Interpretation of the Skip-Gram Negative-Sampling Objective Function},
  booktitle = {Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
  month     = {July},
  year      = {2017},
  address   = {Vancouver, Canada},
  publisher = {Association for Computational Linguistics},
  pages     = {167--171},
  abstract  = {In this paper we define a measure of dependency between two random variables,
	based on the Jensen-Shannon (JS) divergence between their joint distribution
	and the product of their marginal distributions. Then, we show that word2vec's
	skip-gram with negative sampling embedding algorithm finds the optimal
	low-dimensional approximation of this JS dependency measure between the words
	and their contexts. The gap between the optimal score and the low-dimensional
	approximation is demonstrated on a standard text corpus.},
  url       = {http://aclweb.org/anthology/P17-2026}
}

