@InProceedings{popovic-EtAl:2016:VarDial3,
  author    = {Popovi\'{c}, Maja  and  Cholakov, Kostadin  and  Kordoni, Valia  and  Ljube\v{s}i\'{c}, Nikola},
  title     = {Enlarging Scarce In-domain English-Croatian Corpus for SMT of MOOCs Using Serbian},
  booktitle = {Proceedings of the Third Workshop on NLP for Similar Languages, Varieties and Dialects (VarDial3)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {97--105},
  abstract  = {Massive Open Online Courses have been growing rapidly in size and impact. Yet
	the language barrier constitutes a major growth impediment in reaching out all
	people and educating all citizens. A vast majority of educational material is
	available only in English, and state-of-the-art machine translation  systems
	still have not been tailored for this peculiar genre. In addition, a mere
	collection of appropriate in-domain training material is a challenging task. In
	this work, we investigate statistical machine translation of lecture subtitles
	from English into Croatian, which is  morphologically rich and generally weakly
	supported, especially for the educational domain. We show that results
	comparable with publicly available systems trained on much larger data can be
	achieved if a small in-domain training set is used in combination with
	additional in-domain corpus originating from the closely related Serbian
	language.},
  url       = {http://aclweb.org/anthology/W16-4813}
}

