@inproceedings{dhaou-lejeune-2020-comparison,
title = "Comparison between Voting Classifier and Deep Learning methods for {A}rabic Dialect Identification",
author = {Dhaou, Ghoul and
Lejeune, Ga{\"e}l},
booktitle = "Proceedings of the Fifth Arabic Natural Language Processing Workshop",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.wanlp-1.23",
pages = "243--249",
abstract = "In this paper, we present three methods developed for the NADI shared task on Arabic Dialect Identification for tweets. The first and the second method use respectively a machine learning model based on a Voting Classifier with words and character level features and a deep learning model at word level. The third method uses only character-level features. We explored different text representation such as Tf-idf (first model) and word embeddings (second model). The Voting Classifier was the most powerful prediction model, achieving the best macro-average F1 score of 18.8{\%} and an accuracy of 36.54{\%} on the official test. Our model ranked 9 on the challenge and in conclusion we propose some ideas to improve its results.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dhaou-lejeune-2020-comparison">
<titleInfo>
<title>Comparison between Voting Classifier and Deep Learning methods for Arabic Dialect Identification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ghoul</namePart>
<namePart type="family">Dhaou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gaël</namePart>
<namePart type="family">Lejeune</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Arabic Natural Language Processing Workshop</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona, Spain (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we present three methods developed for the NADI shared task on Arabic Dialect Identification for tweets. The first and the second method use respectively a machine learning model based on a Voting Classifier with words and character level features and a deep learning model at word level. The third method uses only character-level features. We explored different text representation such as Tf-idf (first model) and word embeddings (second model). The Voting Classifier was the most powerful prediction model, achieving the best macro-average F1 score of 18.8% and an accuracy of 36.54% on the official test. Our model ranked 9 on the challenge and in conclusion we propose some ideas to improve its results.</abstract>
<identifier type="citekey">dhaou-lejeune-2020-comparison</identifier>
<location>
<url>https://aclanthology.org/2020.wanlp-1.23</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>243</start>
<end>249</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Comparison between Voting Classifier and Deep Learning methods for Arabic Dialect Identification
%A Dhaou, Ghoul
%A Lejeune, Gaël
%S Proceedings of the Fifth Arabic Natural Language Processing Workshop
%D 2020
%8 December
%I Association for Computational Linguistics
%C Barcelona, Spain (Online)
%F dhaou-lejeune-2020-comparison
%X In this paper, we present three methods developed for the NADI shared task on Arabic Dialect Identification for tweets. The first and the second method use respectively a machine learning model based on a Voting Classifier with words and character level features and a deep learning model at word level. The third method uses only character-level features. We explored different text representation such as Tf-idf (first model) and word embeddings (second model). The Voting Classifier was the most powerful prediction model, achieving the best macro-average F1 score of 18.8% and an accuracy of 36.54% on the official test. Our model ranked 9 on the challenge and in conclusion we propose some ideas to improve its results.
%U https://aclanthology.org/2020.wanlp-1.23
%P 243-249
Markdown (Informal)
[Comparison between Voting Classifier and Deep Learning methods for Arabic Dialect Identification](https://aclanthology.org/2020.wanlp-1.23) (Dhaou & Lejeune, WANLP 2020)
ACL