@InProceedings{patel-bhattacharyya:2017:I17-2,
  author    = {Patel, Kevin  and  Bhattacharyya, Pushpak},
  title     = {Towards Lower Bounds on Number of Dimensions for Word Embeddings},
  booktitle = {Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 2: Short Papers)},
  month     = {November},
  year      = {2017},
  address   = {Taipei, Taiwan},
  publisher = {Asian Federation of Natural Language Processing},
  pages     = {31--36},
  abstract  = {Word embeddings are a relatively new addition to the modern NLP researcher's
	toolkit. However, unlike other tools, word embeddings are used in a black box
	manner. There are very few studies regarding various hyperparameters. One such
	hyperparameter is the dimension of word embeddings. They are rather decided
	based on a rule of thumb: in the range 50 to 300. In this paper, we show that
	the dimension should instead be chosen based on corpus statistics. More
	specifically, we show that the number of pairwise equidistant words of the
	corpus vocabulary (as defined by some distance/similarity metric) gives a lower
	bound  on the the number of dimensions , and going below this bound results in
	degradation of quality of learned word embeddings. Through our evaluations on
	standard word embedding evaluation tasks, we show that for dimensions higher
	than or equal to the bound, we get better results as compared to the ones below
	it.},
  url       = {http://www.aclweb.org/anthology/I17-2006}
}

