@inproceedings{bastawisy-elmahdy-2017-multi,
title = "Multi-Lingual Phrase-Based Statistical Machine Translation for {A}rabic-{E}nglish",
author = "Bastawisy, Ahmed and
Elmahdy, Mohamed",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference Recent Advances in Natural Language Processing, {RANLP} 2017",
month = sep,
year = "2017",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd.",
url = "https://doi.org/10.26615/978-954-452-049-6_013",
doi = "10.26615/978-954-452-049-6_013",
pages = "86--89",
abstract = "In this paper, we implement a multilingual Statistical Machine Translation (SMT) system for Arabic-English Translation. Arabic Text can be categorized into standard and dialectal Arabic. These two forms of Arabic differ significantly. Different mono-lingual and multi-lingual hybrid SMT approaches are compared. Mono-lingual systems do always results in better translation accuracy in one Arabic form and poor accuracy in the other. Multi-lingual SMT models that are trained with pooled parallel MSA/dialectal data result in better accuracy. However, since the available parallel MSA data are much larger compared to dialectal data, multilingual models are biased to MSA. We propose in the work, a multi-lingual combination of different mono-lingual systems using an Arabic form classifier. The outcome of the classier directs the system to use the appropriate mono-lingual models (standard, dialectal, or mixture). Testing the different SMT systems shows that the proposed classifier-based SMT system outperforms mono-lingual and data pooled multi-lingual systems.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bastawisy-elmahdy-2017-multi">
<titleInfo>
<title>Multi-Lingual Phrase-Based Statistical Machine Translation for Arabic-English</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ahmed</namePart>
<namePart type="family">Bastawisy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohamed</namePart>
<namePart type="family">Elmahdy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference Recent Advances in Natural Language Processing, RANLP 2017</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we implement a multilingual Statistical Machine Translation (SMT) system for Arabic-English Translation. Arabic Text can be categorized into standard and dialectal Arabic. These two forms of Arabic differ significantly. Different mono-lingual and multi-lingual hybrid SMT approaches are compared. Mono-lingual systems do always results in better translation accuracy in one Arabic form and poor accuracy in the other. Multi-lingual SMT models that are trained with pooled parallel MSA/dialectal data result in better accuracy. However, since the available parallel MSA data are much larger compared to dialectal data, multilingual models are biased to MSA. We propose in the work, a multi-lingual combination of different mono-lingual systems using an Arabic form classifier. The outcome of the classier directs the system to use the appropriate mono-lingual models (standard, dialectal, or mixture). Testing the different SMT systems shows that the proposed classifier-based SMT system outperforms mono-lingual and data pooled multi-lingual systems.</abstract>
<identifier type="citekey">bastawisy-elmahdy-2017-multi</identifier>
<identifier type="doi">10.26615/978-954-452-049-6_013</identifier>
<part>
<date>2017-09</date>
<extent unit="page">
<start>86</start>
<end>89</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multi-Lingual Phrase-Based Statistical Machine Translation for Arabic-English
%A Bastawisy, Ahmed
%A Elmahdy, Mohamed
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the International Conference Recent Advances in Natural Language Processing, RANLP 2017
%D 2017
%8 September
%I INCOMA Ltd.
%C Varna, Bulgaria
%F bastawisy-elmahdy-2017-multi
%X In this paper, we implement a multilingual Statistical Machine Translation (SMT) system for Arabic-English Translation. Arabic Text can be categorized into standard and dialectal Arabic. These two forms of Arabic differ significantly. Different mono-lingual and multi-lingual hybrid SMT approaches are compared. Mono-lingual systems do always results in better translation accuracy in one Arabic form and poor accuracy in the other. Multi-lingual SMT models that are trained with pooled parallel MSA/dialectal data result in better accuracy. However, since the available parallel MSA data are much larger compared to dialectal data, multilingual models are biased to MSA. We propose in the work, a multi-lingual combination of different mono-lingual systems using an Arabic form classifier. The outcome of the classier directs the system to use the appropriate mono-lingual models (standard, dialectal, or mixture). Testing the different SMT systems shows that the proposed classifier-based SMT system outperforms mono-lingual and data pooled multi-lingual systems.
%R 10.26615/978-954-452-049-6_013
%U https://doi.org/10.26615/978-954-452-049-6_013
%P 86-89
Markdown (Informal)
[Multi-Lingual Phrase-Based Statistical Machine Translation for Arabic-English](https://doi.org/10.26615/978-954-452-049-6_013) (Bastawisy & Elmahdy, RANLP 2017)
ACL