@inproceedings{ghoul-lejeune-2019-michael,
title = "{MICHAEL}: Mining Character-level Patterns for {A}rabic Dialect Identification ({MADAR} Challenge)",
author = {Ghoul, Dhaou and
Lejeune, Ga{\"e}l},
editor = "El-Hajj, Wassim and
Belguith, Lamia Hadrich and
Bougares, Fethi and
Magdy, Walid and
Zitouni, Imed and
Tomeh, Nadi and
El-Haj, Mahmoud and
Zaghouani, Wajdi",
booktitle = "Proceedings of the Fourth Arabic Natural Language Processing Workshop",
month = aug,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-4627",
doi = "10.18653/v1/W19-4627",
pages = "229--233",
abstract = "We present MICHAEL, a simple lightweight method for automatic Arabic Dialect Identification on the MADAR travel domain Dialect Identification (DID). MICHAEL uses simple character-level features in order to perform a pre-processing free classification. More precisely, Character N-grams extracted from the original sentences are used to train a Multinomial Naive Bayes classifier. This system achieved an official score (accuracy) of 53.25{\%} with 1{\textless}=N{\textless}=3 but showed a much better result with character 4-grams (62.17{\%} accuracy).",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ghoul-lejeune-2019-michael">
<titleInfo>
<title>MICHAEL: Mining Character-level Patterns for Arabic Dialect Identification (MADAR Challenge)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dhaou</namePart>
<namePart type="family">Ghoul</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gaël</namePart>
<namePart type="family">Lejeune</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Arabic Natural Language Processing Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wassim</namePart>
<namePart type="family">El-Hajj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lamia</namePart>
<namePart type="given">Hadrich</namePart>
<namePart type="family">Belguith</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fethi</namePart>
<namePart type="family">Bougares</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Walid</namePart>
<namePart type="family">Magdy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Imed</namePart>
<namePart type="family">Zitouni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nadi</namePart>
<namePart type="family">Tomeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mahmoud</namePart>
<namePart type="family">El-Haj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wajdi</namePart>
<namePart type="family">Zaghouani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present MICHAEL, a simple lightweight method for automatic Arabic Dialect Identification on the MADAR travel domain Dialect Identification (DID). MICHAEL uses simple character-level features in order to perform a pre-processing free classification. More precisely, Character N-grams extracted from the original sentences are used to train a Multinomial Naive Bayes classifier. This system achieved an official score (accuracy) of 53.25% with 1\textless=N\textless=3 but showed a much better result with character 4-grams (62.17% accuracy).</abstract>
<identifier type="citekey">ghoul-lejeune-2019-michael</identifier>
<identifier type="doi">10.18653/v1/W19-4627</identifier>
<location>
<url>https://aclanthology.org/W19-4627</url>
</location>
<part>
<date>2019-08</date>
<extent unit="page">
<start>229</start>
<end>233</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MICHAEL: Mining Character-level Patterns for Arabic Dialect Identification (MADAR Challenge)
%A Ghoul, Dhaou
%A Lejeune, Gaël
%Y El-Hajj, Wassim
%Y Belguith, Lamia Hadrich
%Y Bougares, Fethi
%Y Magdy, Walid
%Y Zitouni, Imed
%Y Tomeh, Nadi
%Y El-Haj, Mahmoud
%Y Zaghouani, Wajdi
%S Proceedings of the Fourth Arabic Natural Language Processing Workshop
%D 2019
%8 August
%I Association for Computational Linguistics
%C Florence, Italy
%F ghoul-lejeune-2019-michael
%X We present MICHAEL, a simple lightweight method for automatic Arabic Dialect Identification on the MADAR travel domain Dialect Identification (DID). MICHAEL uses simple character-level features in order to perform a pre-processing free classification. More precisely, Character N-grams extracted from the original sentences are used to train a Multinomial Naive Bayes classifier. This system achieved an official score (accuracy) of 53.25% with 1\textless=N\textless=3 but showed a much better result with character 4-grams (62.17% accuracy).
%R 10.18653/v1/W19-4627
%U https://aclanthology.org/W19-4627
%U https://doi.org/10.18653/v1/W19-4627
%P 229-233
Markdown (Informal)
[MICHAEL: Mining Character-level Patterns for Arabic Dialect Identification (MADAR Challenge)](https://aclanthology.org/W19-4627) (Ghoul & Lejeune, WANLP 2019)
ACL