@inproceedings{aliwy-etal-2020-arabic,
title = "{A}rabic Dialects Identification for All {A}rabic countries",
author = "Aliwy, Ahmed and
Taher, Hawraa and
AboAltaheen, Zena",
editor = "Zitouni, Imed and
Abdul-Mageed, Muhammad and
Bouamor, Houda and
Bougares, Fethi and
El-Haj, Mahmoud and
Tomeh, Nadi and
Zaghouani, Wajdi",
booktitle = "Proceedings of the Fifth Arabic Natural Language Processing Workshop",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.wanlp-1.32/",
pages = "302--307",
abstract = {Arabic dialects are among of three main variant of Arabic language (Classical Arabic, modern standard Arabic and dialectal Arabic). It has many variants according to the country, city (provinces) or town. In this paper, several techniques with multiple algorithms are applied for Arabic dialects identification starting from removing noise till classification task using all Arabic countries as 21 classes. Three types of classifiers (Na{\"i}ve Bayes, Logistic Regression, and Decision Tree) are combined using voting with two different methodologies. Also clustering technique is used for decreasing the noise that result from the existing of MSA tweets in the data set for training phase. The results of f-measure were 27.17, 41.34 and 52.38 for first methodology without clustering, second methodology without clustering, and second methodology with clustering, the used data set is NADI shared task data set.}
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="aliwy-etal-2020-arabic">
<titleInfo>
<title>Arabic Dialects Identification for All Arabic countries</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ahmed</namePart>
<namePart type="family">Aliwy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hawraa</namePart>
<namePart type="family">Taher</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zena</namePart>
<namePart type="family">AboAltaheen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Arabic Natural Language Processing Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Imed</namePart>
<namePart type="family">Zitouni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Muhammad</namePart>
<namePart type="family">Abdul-Mageed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fethi</namePart>
<namePart type="family">Bougares</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mahmoud</namePart>
<namePart type="family">El-Haj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nadi</namePart>
<namePart type="family">Tomeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wajdi</namePart>
<namePart type="family">Zaghouani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona, Spain (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Arabic dialects are among of three main variant of Arabic language (Classical Arabic, modern standard Arabic and dialectal Arabic). It has many variants according to the country, city (provinces) or town. In this paper, several techniques with multiple algorithms are applied for Arabic dialects identification starting from removing noise till classification task using all Arabic countries as 21 classes. Three types of classifiers (Naïve Bayes, Logistic Regression, and Decision Tree) are combined using voting with two different methodologies. Also clustering technique is used for decreasing the noise that result from the existing of MSA tweets in the data set for training phase. The results of f-measure were 27.17, 41.34 and 52.38 for first methodology without clustering, second methodology without clustering, and second methodology with clustering, the used data set is NADI shared task data set.</abstract>
<identifier type="citekey">aliwy-etal-2020-arabic</identifier>
<location>
<url>https://aclanthology.org/2020.wanlp-1.32/</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>302</start>
<end>307</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Arabic Dialects Identification for All Arabic countries
%A Aliwy, Ahmed
%A Taher, Hawraa
%A AboAltaheen, Zena
%Y Zitouni, Imed
%Y Abdul-Mageed, Muhammad
%Y Bouamor, Houda
%Y Bougares, Fethi
%Y El-Haj, Mahmoud
%Y Tomeh, Nadi
%Y Zaghouani, Wajdi
%S Proceedings of the Fifth Arabic Natural Language Processing Workshop
%D 2020
%8 December
%I Association for Computational Linguistics
%C Barcelona, Spain (Online)
%F aliwy-etal-2020-arabic
%X Arabic dialects are among of three main variant of Arabic language (Classical Arabic, modern standard Arabic and dialectal Arabic). It has many variants according to the country, city (provinces) or town. In this paper, several techniques with multiple algorithms are applied for Arabic dialects identification starting from removing noise till classification task using all Arabic countries as 21 classes. Three types of classifiers (Naïve Bayes, Logistic Regression, and Decision Tree) are combined using voting with two different methodologies. Also clustering technique is used for decreasing the noise that result from the existing of MSA tweets in the data set for training phase. The results of f-measure were 27.17, 41.34 and 52.38 for first methodology without clustering, second methodology without clustering, and second methodology with clustering, the used data set is NADI shared task data set.
%U https://aclanthology.org/2020.wanlp-1.32/
%P 302-307
Markdown (Informal)
[Arabic Dialects Identification for All Arabic countries](https://aclanthology.org/2020.wanlp-1.32/) (Aliwy et al., WANLP 2020)
ACL