@InProceedings{almansor-alani:2017:RANLP,
  author    = {Almansor, Ebtesam H  and  Al-Ani, Ahmed},
  title     = {Translating Dialectal Arabic as Low Resource Language using Word Embedding},
  booktitle = {Proceedings of the International Conference Recent Advances in Natural Language Processing, RANLP 2017},
  month     = {September},
  year      = {2017},
  address   = {Varna, Bulgaria},
  publisher = {INCOMA Ltd.},
  pages     = {52--57},
  abstract  = {A number of machine translation methods have been proposed in recent years to
	deal with the increasingly important problem of automatic translation between
	texts of different languages or languages and their dialects.
	 These methods have produced promising results when applied to some of the
	widely studied languages. Existing translation methods are mainly implemented
	using rule-based and static machine translation approaches.
	 Rule based approaches utilize language translation rules that can either be
	constructed by an expert, which is quite difficult when dealing with dialects,
	or rely on rule construction algorithms, which require very large parallel
	datasets. 
	Statistical approaches also require large parallel datasets to build the
	translation models.
	 However, large parallel datasets do not exist for languages with low
	resources, such as the Arabic language and its dialects. In this paper we
	propose an algorithm that attempts to overcome this limitation, and apply it to
	translate the Egyptian dialect (EGY) to Modern Standard Arabic (MSA).
	 Monolingual corpus was collected for both MSA and EGY and a relatively small
	parallel language pair set was built to train the models. The proposed method
	utilizes Word embedding as it requires monolingual data rather than parallel
	corpus. Both Continuous Bag of Words and Skip-gram were used to build word
	vectors. The proposed method was validated on four different datasets using a
	four-fold cross validation approach.},
  url       = {https://doi.org/10.26615/978-954-452-049-6_008}
}

