@InProceedings{labeau-allauzen:2017:EACLshort,
  author    = {Labeau, Matthieu  and  Allauzen, Alexandre},
  title     = {An experimental analysis of Noise-Contrastive Estimation: the noise distribution matters},
  booktitle = {Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers},
  month     = {April},
  year      = {2017},
  address   = {Valencia, Spain},
  publisher = {Association for Computational Linguistics},
  pages     = {15--20},
  abstract  = {Noise Contrastive Estimation (NCE) is a learning procedure that is regularly
	used to train neural language models, since it avoids the computational
	bottleneck caused by the output softmax. In this paper, we attempt to explain
	some of the weaknesses of this objective function, and to draw directions for
	further developments. Experiments on a small task show the issues raised by an
	unigram noise distribution, and that a context dependent noise distribution,
	such as the bigram distribution, can solve these issues and provide stable and
	data-efficient learning.},
  url       = {http://www.aclweb.org/anthology/E17-2003}
}

