@InProceedings{bastawisy-elmahdy:2017:RANLP,
  author    = {bastawisy, ahmed  and  Elmahdy, Mohamed},
  title     = {Multi-Lingual Phrase-Based Statistical Machine Translation for Arabic-English},
  booktitle = {Proceedings of the International Conference Recent Advances in Natural Language Processing, RANLP 2017},
  month     = {September},
  year      = {2017},
  address   = {Varna, Bulgaria},
  publisher = {INCOMA Ltd.},
  pages     = {86--89},
  abstract  = {In this paper, we implement a multilingual Statistical Machine Translation
	(SMT) system for Arabic-English Translation. Arabic Text can be categorized
	into standard and dialectal Arabic. These two forms of Arabic differ
	significantly. Different mono-lingual and multi-lingual hybrid SMT approaches
	are compared. Mono-lingual systems do always results in better translation
	accuracy in one Arabic form and poor accuracy in the other. Multi-lingual SMT
	models that are trained with pooled parallel MSA/dialectal data result in
	better accuracy. However, since the available parallel MSA data are much larger
	compared to dialectal data, multilingual models are biased to MSA. We propose
	in the work, a multi-lingual combination of different mono-lingual systems
	using an Arabic form classifier. The outcome of the classier directs the system
	to use the appropriate mono-lingual models (standard, dialectal, or mixture).
	Testing the different SMT systems shows that the proposed classifier-based SMT
	system outperforms mono-lingual and data pooled multi-lingual systems.},
  url       = {https://doi.org/10.26615/978-954-452-049-6_013}
}