@InProceedings{shi-knight:2017:Short,
  author    = {Shi, Xing  and  Knight, Kevin},
  title     = {Speeding Up Neural Machine Translation Decoding by Shrinking Run-time Vocabulary},
  booktitle = {Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
  month     = {July},
  year      = {2017},
  address   = {Vancouver, Canada},
  publisher = {Association for Computational Linguistics},
  pages     = {574--579},
  abstract  = {We speed up Neural Machine Translation (NMT) decoding by shrinking run-time
	target vocabulary. We experiment with two shrinking approaches: Locality
	Sensitive Hashing (LSH) and word alignments. Using the latter method, we get a
	2x overall speed-up over a highly-optimized GPU implementation, without hurting
	BLEU. On certain low-resource language pairs, the same methods improve BLEU by
	0.5 points. We also report a negative result for LSH on GPUs, due to relatively
	large overhead, though it was successful on CPUs. Compared with Locality
	Sensitive Hashing (LSH), decoding with word alignments is GPU-friendly,
	orthogonal to existing speedup methods and more robust across language pairs.},
  url       = {http://aclweb.org/anthology/P17-2091}
}