@InProceedings{marie-fujita:2017:Short,
  author    = {Marie, Benjamin  and  Fujita, Atsushi},
  title     = {Efficient Extraction of Pseudo-Parallel Sentences from Raw Monolingual Data Using Word Embeddings},
  booktitle = {Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
  month     = {July},
  year      = {2017},
  address   = {Vancouver, Canada},
  publisher = {Association for Computational Linguistics},
  pages     = {392--398},
  abstract  = {We propose a new method for extracting pseudo-parallel sentences from a pair of
	large monolingual corpora, without relying on any document-level information.
	Our method first exploits word embeddings in order to efficiently evaluate
	trillions of candidate sentence pairs and then a classifier to find the most
	reliable ones. We report significant improvements in domain adaptation for
	statistical machine translation when using a translation model trained on the
	sentence pairs extracted from in-domain monolingual corpora.},
  url       = {http://aclweb.org/anthology/P17-2062}
}

