@InProceedings{abdulhameed-EtAl:2016:WSSANLP2016,
  author    = {Abdul Hameed, Riyafa  and  Pathirennehelage, Nadeeshani  and  Ihalapathirana, Anusha  and  Ziyad Mohamed, Maryam  and  Ranathunga, Surangika  and  Jayasena, Sanath  and  Dias, Gihan  and  Fernando, Sandareka},
  title     = {Automatic Creation of a Sentence Aligned Sinhala-Tamil Parallel Corpus},
  booktitle = {Proceedings of the 6th Workshop on South and Southeast Asian Natural Language Processing (WSSANLP2016)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {124--132},
  abstract  = {A sentence aligned parallel corpus is an important prerequisite in statistical
	machine translation. However, manual creation of such a parallel corpus is time
	consuming, and requires experts fluent in both languages. Automatic creation of
	a sentence aligned parallel corpus using parallel text is the solution to this
	problem. In this paper, we present the first ever empirical evaluation carried
	out to identify the best method to automatically create a sentence aligned
	Sinhala-Tamil parallel corpus. Annual reports from Sri Lankan government
	institutions were used as the parallel text for aligning. Despite both Sinhala
	and Tamil being under-resourced languages, we were able to achieve an F-score
	value  of 0.791 using a hybrid approach that makes use of a bilingual
	dictionary.},
  url       = {http://aclweb.org/anthology/W16-3713}
}

