@InProceedings{bakliwal-vv-jawahar:2016:WSSANLP2016,
  author    = {Bakliwal, Priyam  and  V V, Devadath  and  Jawahar, C V},
  title     = {Align Me: A framework to generate Parallel Corpus Using OCRs and Bilingual Dictionaries},
  booktitle = {Proceedings of the 6th Workshop on South and Southeast Asian Natural Language Processing (WSSANLP2016)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {183--187},
  abstract  = {Multilingual language processing tasks like statistical machine translation and
	cross language information retrieval rely mainly on availability of accurate
	parallel corpora. Manual construction of such corpus can be extremely expensive
	and time consuming. In this paper we present a simple yet efficient method to
	generate huge amount of reasonably accurate parallel corpus with minimal user
	efforts. We utilize the availability of large number of English books and their
	corresponding translations in other languages to build parallel corpus. Optical
	Character Recognizing systems are used to digitize such books. We propose a
	robust dictionary based parallel corpus generation system for alignment of
	multilingual text at different levels of granularity (sentence, paragraphs,
	etc). We show the performance of our proposed method on a manually aligned
	dataset of 300 Hindi-English sentences and 100 English-Malayalam sentences.},
  url       = {http://aclweb.org/anthology/W16-3719}
}

