@inproceedings{lichouri-etal-2021-arabic,
title = "{A}rabic Dialect Identification based on a Weighted Concatenation of {TF}-{IDF} Features",
author = "Lichouri, Mohamed and
Abbas, Mourad and
Lounnas, Khaled and
Benaziz, Besma and
Zitouni, Aicha",
editor = "Habash, Nizar and
Bouamor, Houda and
Hajj, Hazem and
Magdy, Walid and
Zaghouani, Wajdi and
Bougares, Fethi and
Tomeh, Nadi and
Abu Farha, Ibrahim and
Touileb, Samia",
booktitle = "Proceedings of the Sixth Arabic Natural Language Processing Workshop",
month = apr,
year = "2021",
address = "Kyiv, Ukraine (Virtual)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.wanlp-1.33",
pages = "282--286",
abstract = "In this paper, we analyze the impact of the weighted concatenation of TF-IDF features for the Arabic Dialect Identification task while we participated in the NADI2021 shared task. This study is performed for two subtasks: subtask 1.1 (country-level MSA) and subtask 1.2 (country-level DA) identification. The classifiers supporting our comparative study are Linear Support Vector Classification (LSVC), Linear Regression (LR), Perceptron, Stochastic Gradient Descent (SGD), Passive Aggressive (PA), Complement Naive Bayes (CNB), MutliLayer Perceptron (MLP), and RidgeClassifier. In the evaluation phase, our system gives F1 scores of 14.87{\%} and 21.49{\%}, for country-level MSA and DA identification respectively, which is very close to the average F1 scores achieved by the submitted systems and recorded for both subtasks (18.70{\%} and 24.23{\%}).",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lichouri-etal-2021-arabic">
<titleInfo>
<title>Arabic Dialect Identification based on a Weighted Concatenation of TF-IDF Features</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mohamed</namePart>
<namePart type="family">Lichouri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mourad</namePart>
<namePart type="family">Abbas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khaled</namePart>
<namePart type="family">Lounnas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Besma</namePart>
<namePart type="family">Benaziz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aicha</namePart>
<namePart type="family">Zitouni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth Arabic Natural Language Processing Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nizar</namePart>
<namePart type="family">Habash</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hazem</namePart>
<namePart type="family">Hajj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Walid</namePart>
<namePart type="family">Magdy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wajdi</namePart>
<namePart type="family">Zaghouani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fethi</namePart>
<namePart type="family">Bougares</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nadi</namePart>
<namePart type="family">Tomeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ibrahim</namePart>
<namePart type="family">Abu Farha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samia</namePart>
<namePart type="family">Touileb</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Kyiv, Ukraine (Virtual)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we analyze the impact of the weighted concatenation of TF-IDF features for the Arabic Dialect Identification task while we participated in the NADI2021 shared task. This study is performed for two subtasks: subtask 1.1 (country-level MSA) and subtask 1.2 (country-level DA) identification. The classifiers supporting our comparative study are Linear Support Vector Classification (LSVC), Linear Regression (LR), Perceptron, Stochastic Gradient Descent (SGD), Passive Aggressive (PA), Complement Naive Bayes (CNB), MutliLayer Perceptron (MLP), and RidgeClassifier. In the evaluation phase, our system gives F1 scores of 14.87% and 21.49%, for country-level MSA and DA identification respectively, which is very close to the average F1 scores achieved by the submitted systems and recorded for both subtasks (18.70% and 24.23%).</abstract>
<identifier type="citekey">lichouri-etal-2021-arabic</identifier>
<location>
<url>https://aclanthology.org/2021.wanlp-1.33</url>
</location>
<part>
<date>2021-04</date>
<extent unit="page">
<start>282</start>
<end>286</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Arabic Dialect Identification based on a Weighted Concatenation of TF-IDF Features
%A Lichouri, Mohamed
%A Abbas, Mourad
%A Lounnas, Khaled
%A Benaziz, Besma
%A Zitouni, Aicha
%Y Habash, Nizar
%Y Bouamor, Houda
%Y Hajj, Hazem
%Y Magdy, Walid
%Y Zaghouani, Wajdi
%Y Bougares, Fethi
%Y Tomeh, Nadi
%Y Abu Farha, Ibrahim
%Y Touileb, Samia
%S Proceedings of the Sixth Arabic Natural Language Processing Workshop
%D 2021
%8 April
%I Association for Computational Linguistics
%C Kyiv, Ukraine (Virtual)
%F lichouri-etal-2021-arabic
%X In this paper, we analyze the impact of the weighted concatenation of TF-IDF features for the Arabic Dialect Identification task while we participated in the NADI2021 shared task. This study is performed for two subtasks: subtask 1.1 (country-level MSA) and subtask 1.2 (country-level DA) identification. The classifiers supporting our comparative study are Linear Support Vector Classification (LSVC), Linear Regression (LR), Perceptron, Stochastic Gradient Descent (SGD), Passive Aggressive (PA), Complement Naive Bayes (CNB), MutliLayer Perceptron (MLP), and RidgeClassifier. In the evaluation phase, our system gives F1 scores of 14.87% and 21.49%, for country-level MSA and DA identification respectively, which is very close to the average F1 scores achieved by the submitted systems and recorded for both subtasks (18.70% and 24.23%).
%U https://aclanthology.org/2021.wanlp-1.33
%P 282-286
Markdown (Informal)
[Arabic Dialect Identification based on a Weighted Concatenation of TF-IDF Features](https://aclanthology.org/2021.wanlp-1.33) (Lichouri et al., WANLP 2021)
ACL