@InProceedings{press-wolf:2017:EACLshort,
  author    = {Press, Ofir  and  Wolf, Lior},
  title     = {Using the Output Embedding to Improve Language Models},
  booktitle = {Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers},
  month     = {April},
  year      = {2017},
  address   = {Valencia, Spain},
  publisher = {Association for Computational Linguistics},
  pages     = {157--163},
  abstract  = {We study the topmost weight matrix of neural network language models. We show
	that this matrix constitutes a valid word embedding. When training language
	models, we recommend tying the input embedding and this output embedding. We
	analyze the resulting update rules and show that the tied embedding evolves in
	a more similar way to the output embedding than to the input embedding in the
	untied model. We also offer a new method of regularizing the output embedding.
	Our methods lead to a significant reduction in perplexity, as we are able to
	show on a variety of neural network language models. Finally, we show that
	weight tying can reduce the size of neural translation models to less than half
	of their original size without harming their performance.},
  url       = {http://www.aclweb.org/anthology/E17-2025}
}

