@InProceedings{madhyastha-espanabonet:2017:RepL4NLP,
  author    = {Madhyastha, Pranava Swaroop  and  Espa\~{n}a-Bonet, Cristina},
  title     = {Learning Bilingual Projections of Embeddings for Vocabulary Expansion in Machine Translation},
  booktitle = {Proceedings of the 2nd Workshop on Representation Learning for NLP},
  month     = {August},
  year      = {2017},
  address   = {Vancouver, Canada},
  publisher = {Association for Computational Linguistics},
  pages     = {139--145},
  abstract  = {We propose a simple log-bilinear softmax-based model to deal with vocabulary
	expansion in machine translation. Our model uses word embeddings trained on
	significantly large unlabelled monolingual
	corpora and learns over a fairly small, word-to-word bilingual dictionary.
	Given an out-of-vocabulary source word, the model generates a probabilistic
	list of possible translations in the target language using the trained
	bilingual embeddings. We integrate these translation options into a standard
	phrase-based statistical machine translation system and obtain consistent
	improvements in translation quality on the English--Spanish language pair. When
	tested over an out-of-domain testset, we get a significant improvement of 3.9
	BLEU points.},
  url       = {http://www.aclweb.org/anthology/W17-2617}
}

