@InProceedings{mahata-das-bandyopadhyay:2017:BUCC,
  author    = {Mahata, Sainik  and  Das, Dipankar  and  Bandyopadhyay, Sivaji},
  title     = {BUCC2017: A Hybrid Approach for Identifying Parallel Sentences in Comparable Corpora},
  booktitle = {Proceedings of the 10th Workshop on Building and Using Comparable Corpora},
  month     = {August},
  year      = {2017},
  address   = {Vancouver, Canada},
  publisher = {Association for Computational Linguistics},
  pages     = {56--59},
  abstract  = {A Statistical Machine Translation (SMT) system is always trained using large
	parallel corpus to produce effective translation. Not only is the corpus
	scarce, it also involves a lot of manual labor and cost. Parallel corpus can be
	prepared by employing comparable corpora where a pair of corpora is in two
	different languages pointing to the same domain. In the present work, we try to
	build a parallel corpus for French-English language pair from a given
	comparable corpus. The data and the problem set are provided as part of the
	shared task organized by BUCC 2017. We have proposed a system that first
	translates the sentences by heavily relying on Moses and then group the
	sentences based on sentence length similarity. Finally, the one to one sentence
	selection was done based on Cosine Similarity algorithm.},
  url       = {http://www.aclweb.org/anthology/W17-2511}
}

